]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/mount-util.c
tree-wide: drop string.h when string-util.h or friends are included
[thirdparty/systemd.git] / src / shared / mount-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mount.h>
6 #include <sys/stat.h>
7 #include <sys/statvfs.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "extract-word.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "libmount-util.h"
17 #include "mount-util.h"
18 #include "mountpoint-util.h"
19 #include "parse-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "stdio-util.h"
23 #include "string-util.h"
24 #include "strv.h"
25
26 int umount_recursive(const char *prefix, int flags) {
27 int n = 0, r;
28 bool again;
29
30 /* Try to umount everything recursively below a
31 * directory. Also, take care of stacked mounts, and keep
32 * unmounting them until they are gone. */
33
34 do {
35 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
36 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
37
38 again = false;
39
40 r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
41 if (r < 0)
42 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
43
44 for (;;) {
45 struct libmnt_fs *fs;
46 const char *path;
47
48 r = mnt_table_next_fs(table, iter, &fs);
49 if (r == 1)
50 break;
51 if (r < 0)
52 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
53
54 path = mnt_fs_get_target(fs);
55 if (!path)
56 continue;
57
58 if (!path_startswith(path, prefix))
59 continue;
60
61 if (umount2(path, flags) < 0) {
62 r = log_debug_errno(errno, "Failed to umount %s: %m", path);
63 continue;
64 }
65
66 log_debug("Successfully unmounted %s", path);
67
68 again = true;
69 n++;
70
71 break;
72 }
73
74 } while (again);
75
76 return n;
77 }
78
79 /* Get the mount flags for the mountpoint at "path" from "table" */
80 static int get_mount_flags(const char *path, unsigned long *flags, struct libmnt_table *table) {
81 struct statvfs buf = {};
82 struct libmnt_fs *fs = NULL;
83 const char *opts = NULL;
84 int r = 0;
85
86 fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
87 if (!fs) {
88 log_warning("Could not find '%s' in mount table", path);
89 goto fallback;
90 }
91
92 opts = mnt_fs_get_vfs_options(fs);
93 r = mnt_optstr_get_flags(opts, flags, mnt_get_builtin_optmap(MNT_LINUX_MAP));
94 if (r != 0) {
95 log_warning_errno(r, "Could not get flags for '%s': %m", path);
96 goto fallback;
97 }
98
99 /* relatime is default and trying to set it in an unprivileged container causes EPERM */
100 *flags &= ~MS_RELATIME;
101 return 0;
102
103 fallback:
104 if (statvfs(path, &buf) < 0)
105 return -errno;
106
107 *flags = buf.f_flag;
108 return 0;
109 }
110
111 /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
112 * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
113 int bind_remount_recursive_with_mountinfo(
114 const char *prefix,
115 unsigned long new_flags,
116 unsigned long flags_mask,
117 char **blacklist,
118 FILE *proc_self_mountinfo) {
119
120 _cleanup_set_free_free_ Set *done = NULL;
121 _cleanup_free_ char *cleaned = NULL;
122 int r;
123
124 assert(proc_self_mountinfo);
125
126 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
127 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
128 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
129 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
130 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
131 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
132 * future submounts that have been triggered via autofs.
133 *
134 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
135 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
136
137 cleaned = strdup(prefix);
138 if (!cleaned)
139 return -ENOMEM;
140
141 path_simplify(cleaned, false);
142
143 done = set_new(&path_hash_ops);
144 if (!done)
145 return -ENOMEM;
146
147 for (;;) {
148 _cleanup_set_free_free_ Set *todo = NULL;
149 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
150 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
151 bool top_autofs = false;
152 char *x;
153 unsigned long orig_flags;
154
155 todo = set_new(&path_hash_ops);
156 if (!todo)
157 return -ENOMEM;
158
159 rewind(proc_self_mountinfo);
160
161 r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
162 if (r < 0)
163 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
164
165 for (;;) {
166 struct libmnt_fs *fs;
167 const char *path, *type;
168
169 r = mnt_table_next_fs(table, iter, &fs);
170 if (r == 1)
171 break;
172 if (r < 0)
173 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
174
175 path = mnt_fs_get_target(fs);
176 type = mnt_fs_get_fstype(fs);
177 if (!path || !type)
178 continue;
179
180 if (!path_startswith(path, cleaned))
181 continue;
182
183 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount
184 * we shall operate on. */
185 if (!path_equal(path, cleaned)) {
186 bool blacklisted = false;
187 char **i;
188
189 STRV_FOREACH(i, blacklist) {
190 if (path_equal(*i, cleaned))
191 continue;
192
193 if (!path_startswith(*i, cleaned))
194 continue;
195
196 if (path_startswith(path, *i)) {
197 blacklisted = true;
198 log_debug("Not remounting %s blacklisted by %s, called for %s",
199 path, *i, cleaned);
200 break;
201 }
202 }
203 if (blacklisted)
204 continue;
205 }
206
207 /* Let's ignore autofs mounts. If they aren't
208 * triggered yet, we want to avoid triggering
209 * them, as we don't make any guarantees for
210 * future submounts anyway. If they are
211 * already triggered, then we will find
212 * another entry for this. */
213 if (streq(type, "autofs")) {
214 top_autofs = top_autofs || path_equal(path, cleaned);
215 continue;
216 }
217
218 if (!set_contains(done, path)) {
219 r = set_put_strdup(todo, path);
220 if (r < 0)
221 return r;
222 }
223 }
224
225 /* If we have no submounts to process anymore and if
226 * the root is either already done, or an autofs, we
227 * are done */
228 if (set_isempty(todo) &&
229 (top_autofs || set_contains(done, cleaned)))
230 return 0;
231
232 if (!set_contains(done, cleaned) &&
233 !set_contains(todo, cleaned)) {
234 /* The prefix directory itself is not yet a mount, make it one. */
235 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
236 return -errno;
237
238 orig_flags = 0;
239 (void) get_mount_flags(cleaned, &orig_flags, table);
240 orig_flags &= ~MS_RDONLY;
241
242 if (mount(NULL, cleaned, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
243 return -errno;
244
245 log_debug("Made top-level directory %s a mount point.", prefix);
246
247 r = set_put_strdup(done, cleaned);
248 if (r < 0)
249 return r;
250 }
251
252 while ((x = set_steal_first(todo))) {
253
254 r = set_consume(done, x);
255 if (IN_SET(r, 0, -EEXIST))
256 continue;
257 if (r < 0)
258 return r;
259
260 /* Deal with mount points that are obstructed by a later mount */
261 r = path_is_mount_point(x, NULL, 0);
262 if (IN_SET(r, 0, -ENOENT))
263 continue;
264 if (IN_SET(r, -EACCES, -EPERM)) {
265 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
266 * may not be acceessed. E.g.,
267 *
268 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
269 * $ bindfs --no-allow-other ~/mnt ~/mnt
270 *
271 * Then, root user cannot access the mount point ~/mnt/mnt.
272 * In such cases, the submounts are ignored, as we have no way to manage them. */
273 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
274 continue;
275 }
276 if (r < 0)
277 return r;
278
279 /* Try to reuse the original flag set */
280 orig_flags = 0;
281 (void) get_mount_flags(x, &orig_flags, table);
282 orig_flags &= ~MS_RDONLY;
283
284 if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
285 return -errno;
286
287 log_debug("Remounted %s read-only.", x);
288 }
289 }
290 }
291
292 int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist) {
293 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
294 int r;
295
296 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
297 if (r < 0)
298 return r;
299
300 return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, blacklist, proc_self_mountinfo);
301 }
302
303 int mount_move_root(const char *path) {
304 assert(path);
305
306 if (chdir(path) < 0)
307 return -errno;
308
309 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
310 return -errno;
311
312 if (chroot(".") < 0)
313 return -errno;
314
315 if (chdir("/") < 0)
316 return -errno;
317
318 return 0;
319 }
320
321 int repeat_unmount(const char *path, int flags) {
322 bool done = false;
323
324 assert(path);
325
326 /* If there are multiple mounts on a mount point, this
327 * removes them all */
328
329 for (;;) {
330 if (umount2(path, flags) < 0) {
331
332 if (errno == EINVAL)
333 return done;
334
335 return -errno;
336 }
337
338 done = true;
339 }
340 }
341
342 const char* mode_to_inaccessible_node(mode_t mode) {
343 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
344 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
345 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
346 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
347 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
348 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
349
350 switch(mode & S_IFMT) {
351 case S_IFREG:
352 return "/run/systemd/inaccessible/reg";
353
354 case S_IFDIR:
355 return "/run/systemd/inaccessible/dir";
356
357 case S_IFCHR:
358 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
359 return "/run/systemd/inaccessible/chr";
360 return "/run/systemd/inaccessible/sock";
361
362 case S_IFBLK:
363 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
364 return "/run/systemd/inaccessible/blk";
365 return "/run/systemd/inaccessible/sock";
366
367 case S_IFIFO:
368 return "/run/systemd/inaccessible/fifo";
369
370 case S_IFSOCK:
371 return "/run/systemd/inaccessible/sock";
372 }
373 return NULL;
374 }
375
376 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
377 static char* mount_flags_to_string(long unsigned flags) {
378 char *x;
379 _cleanup_free_ char *y = NULL;
380 long unsigned overflow;
381
382 overflow = flags & ~(MS_RDONLY |
383 MS_NOSUID |
384 MS_NODEV |
385 MS_NOEXEC |
386 MS_SYNCHRONOUS |
387 MS_REMOUNT |
388 MS_MANDLOCK |
389 MS_DIRSYNC |
390 MS_NOATIME |
391 MS_NODIRATIME |
392 MS_BIND |
393 MS_MOVE |
394 MS_REC |
395 MS_SILENT |
396 MS_POSIXACL |
397 MS_UNBINDABLE |
398 MS_PRIVATE |
399 MS_SLAVE |
400 MS_SHARED |
401 MS_RELATIME |
402 MS_KERNMOUNT |
403 MS_I_VERSION |
404 MS_STRICTATIME |
405 MS_LAZYTIME);
406
407 if (flags == 0 || overflow != 0)
408 if (asprintf(&y, "%lx", overflow) < 0)
409 return NULL;
410
411 x = strjoin(FLAG(MS_RDONLY),
412 FLAG(MS_NOSUID),
413 FLAG(MS_NODEV),
414 FLAG(MS_NOEXEC),
415 FLAG(MS_SYNCHRONOUS),
416 FLAG(MS_REMOUNT),
417 FLAG(MS_MANDLOCK),
418 FLAG(MS_DIRSYNC),
419 FLAG(MS_NOATIME),
420 FLAG(MS_NODIRATIME),
421 FLAG(MS_BIND),
422 FLAG(MS_MOVE),
423 FLAG(MS_REC),
424 FLAG(MS_SILENT),
425 FLAG(MS_POSIXACL),
426 FLAG(MS_UNBINDABLE),
427 FLAG(MS_PRIVATE),
428 FLAG(MS_SLAVE),
429 FLAG(MS_SHARED),
430 FLAG(MS_RELATIME),
431 FLAG(MS_KERNMOUNT),
432 FLAG(MS_I_VERSION),
433 FLAG(MS_STRICTATIME),
434 FLAG(MS_LAZYTIME),
435 y);
436 if (!x)
437 return NULL;
438 if (!y)
439 x[strlen(x) - 1] = '\0'; /* truncate the last | */
440 return x;
441 }
442
443 int mount_verbose(
444 int error_log_level,
445 const char *what,
446 const char *where,
447 const char *type,
448 unsigned long flags,
449 const char *options) {
450
451 _cleanup_free_ char *fl = NULL, *o = NULL;
452 unsigned long f;
453 int r;
454
455 r = mount_option_mangle(options, flags, &f, &o);
456 if (r < 0)
457 return log_full_errno(error_log_level, r,
458 "Failed to mangle mount options %s: %m",
459 strempty(options));
460
461 fl = mount_flags_to_string(f);
462
463 if ((f & MS_REMOUNT) && !what && !type)
464 log_debug("Remounting %s (%s \"%s\")...",
465 where, strnull(fl), strempty(o));
466 else if (!what && !type)
467 log_debug("Mounting %s (%s \"%s\")...",
468 where, strnull(fl), strempty(o));
469 else if ((f & MS_BIND) && !type)
470 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
471 what, where, strnull(fl), strempty(o));
472 else if (f & MS_MOVE)
473 log_debug("Moving mount %s → %s (%s \"%s\")...",
474 what, where, strnull(fl), strempty(o));
475 else
476 log_debug("Mounting %s on %s (%s \"%s\")...",
477 strna(type), where, strnull(fl), strempty(o));
478 if (mount(what, where, type, f, o) < 0)
479 return log_full_errno(error_log_level, errno,
480 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
481 strna(what), strna(type), where, strnull(fl), strempty(o));
482 return 0;
483 }
484
485 int umount_verbose(const char *what) {
486 log_debug("Umounting %s...", what);
487 if (umount(what) < 0)
488 return log_error_errno(errno, "Failed to unmount %s: %m", what);
489 return 0;
490 }
491
492 int mount_option_mangle(
493 const char *options,
494 unsigned long mount_flags,
495 unsigned long *ret_mount_flags,
496 char **ret_remaining_options) {
497
498 const struct libmnt_optmap *map;
499 _cleanup_free_ char *ret = NULL;
500 const char *p;
501 int r;
502
503 /* This extracts mount flags from the mount options, and store
504 * non-mount-flag options to '*ret_remaining_options'.
505 * E.g.,
506 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
507 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
508 * "size=1630748k,mode=700,uid=1000,gid=1000".
509 * See more examples in test-mount-utils.c.
510 *
511 * Note that if 'options' does not contain any non-mount-flag options,
512 * then '*ret_remaining_options' is set to NULL instead of empty string.
513 * Note that this does not check validity of options stored in
514 * '*ret_remaining_options'.
515 * Note that if 'options' is NULL, then this just copies 'mount_flags'
516 * to '*ret_mount_flags'. */
517
518 assert(ret_mount_flags);
519 assert(ret_remaining_options);
520
521 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
522 if (!map)
523 return -EINVAL;
524
525 p = options;
526 for (;;) {
527 _cleanup_free_ char *word = NULL;
528 const struct libmnt_optmap *ent;
529
530 r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
531 if (r < 0)
532 return r;
533 if (r == 0)
534 break;
535
536 for (ent = map; ent->name; ent++) {
537 /* All entries in MNT_LINUX_MAP do not take any argument.
538 * Thus, ent->name does not contain "=" or "[=]". */
539 if (!streq(word, ent->name))
540 continue;
541
542 if (!(ent->mask & MNT_INVERT))
543 mount_flags |= ent->id;
544 else if (mount_flags & ent->id)
545 mount_flags ^= ent->id;
546
547 break;
548 }
549
550 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
551 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
552 return -ENOMEM;
553 }
554
555 *ret_mount_flags = mount_flags;
556 *ret_remaining_options = TAKE_PTR(ret);
557
558 return 0;
559 }