]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mount-util.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / basic / mount-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdio_ext.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <sys/mount.h>
8 #include <sys/stat.h>
9 #include <sys/statvfs.h>
10 #include <unistd.h>
11
12 /* Include later */
13 #include <libmount.h>
14
15 #include "alloc-util.h"
16 #include "escape.h"
17 #include "extract-word.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "fs-util.h"
21 #include "hashmap.h"
22 #include "mount-util.h"
23 #include "parse-util.h"
24 #include "path-util.h"
25 #include "set.h"
26 #include "stdio-util.h"
27 #include "string-util.h"
28 #include "strv.h"
29
30 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
31 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
32 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
33 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
34 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
35 * with large file handles anyway. */
36 #define ORIGINAL_MAX_HANDLE_SZ 128
37
38 int name_to_handle_at_loop(
39 int fd,
40 const char *path,
41 struct file_handle **ret_handle,
42 int *ret_mnt_id,
43 int flags) {
44
45 _cleanup_free_ struct file_handle *h = NULL;
46 size_t n = ORIGINAL_MAX_HANDLE_SZ;
47
48 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
49 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
50 * start value, it is not an upper bound on the buffer size required.
51 *
52 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
53 * as NULL if there's no interest in either. */
54
55 for (;;) {
56 int mnt_id = -1;
57
58 h = malloc0(offsetof(struct file_handle, f_handle) + n);
59 if (!h)
60 return -ENOMEM;
61
62 h->handle_bytes = n;
63
64 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
65
66 if (ret_handle)
67 *ret_handle = TAKE_PTR(h);
68
69 if (ret_mnt_id)
70 *ret_mnt_id = mnt_id;
71
72 return 0;
73 }
74 if (errno != EOVERFLOW)
75 return -errno;
76
77 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
78
79 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
80 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
81 * be filled in, and the caller was interested in only the mount ID an nothing else. */
82
83 *ret_mnt_id = mnt_id;
84 return 0;
85 }
86
87 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
88 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
89 * buffer. In that case propagate EOVERFLOW */
90 if (h->handle_bytes <= n)
91 return -EOVERFLOW;
92
93 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
94 n = h->handle_bytes;
95 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
96 return -EOVERFLOW;
97
98 h = mfree(h);
99 }
100 }
101
102 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
103 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
104 _cleanup_free_ char *fdinfo = NULL;
105 _cleanup_close_ int subfd = -1;
106 char *p;
107 int r;
108
109 if ((flags & AT_EMPTY_PATH) && isempty(filename))
110 xsprintf(path, "/proc/self/fdinfo/%i", fd);
111 else {
112 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
113 if (subfd < 0)
114 return -errno;
115
116 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
117 }
118
119 r = read_full_file(path, &fdinfo, NULL);
120 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
121 return -EOPNOTSUPP;
122 if (r < 0)
123 return r;
124
125 p = startswith(fdinfo, "mnt_id:");
126 if (!p) {
127 p = strstr(fdinfo, "\nmnt_id:");
128 if (!p) /* The mnt_id field is a relatively new addition */
129 return -EOPNOTSUPP;
130
131 p += 8;
132 }
133
134 p += strspn(p, WHITESPACE);
135 p[strcspn(p, WHITESPACE)] = 0;
136
137 return safe_atoi(p, mnt_id);
138 }
139
140 int fd_is_mount_point(int fd, const char *filename, int flags) {
141 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
142 int mount_id = -1, mount_id_parent = -1;
143 bool nosupp = false, check_st_dev = true;
144 struct stat a, b;
145 int r;
146
147 assert(fd >= 0);
148 assert(filename);
149
150 /* First we will try the name_to_handle_at() syscall, which
151 * tells us the mount id and an opaque file "handle". It is
152 * not supported everywhere though (kernel compile-time
153 * option, not all file systems are hooked up). If it works
154 * the mount id is usually good enough to tell us whether
155 * something is a mount point.
156 *
157 * If that didn't work we will try to read the mount id from
158 * /proc/self/fdinfo/<fd>. This is almost as good as
159 * name_to_handle_at(), however, does not return the
160 * opaque file handle. The opaque file handle is pretty useful
161 * to detect the root directory, which we should always
162 * consider a mount point. Hence we use this only as
163 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
164 * kernel addition.
165 *
166 * As last fallback we do traditional fstat() based st_dev
167 * comparisons. This is how things were traditionally done,
168 * but unionfs breaks this since it exposes file
169 * systems with a variety of st_dev reported. Also, btrfs
170 * subvolumes have different st_dev, even though they aren't
171 * real mounts of their own. */
172
173 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
174 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
175 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
176 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
177 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
178 * (EINVAL): fall back to simpler logic. */
179 goto fallback_fdinfo;
180 else if (r == -EOPNOTSUPP)
181 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
182 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
183 * logic */
184 nosupp = true;
185 else if (r < 0)
186 return r;
187
188 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
189 if (r == -EOPNOTSUPP) {
190 if (nosupp)
191 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
192 goto fallback_fdinfo;
193 else
194 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
195 * it must be a mount point. */
196 return 1;
197 } else if (r < 0)
198 return r;
199
200 /* The parent can do name_to_handle_at() but the
201 * directory we are interested in can't? If so, it
202 * must be a mount point. */
203 if (nosupp)
204 return 1;
205
206 /* If the file handle for the directory we are
207 * interested in and its parent are identical, we
208 * assume this is the root directory, which is a mount
209 * point. */
210
211 if (h->handle_bytes == h_parent->handle_bytes &&
212 h->handle_type == h_parent->handle_type &&
213 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
214 return 1;
215
216 return mount_id != mount_id_parent;
217
218 fallback_fdinfo:
219 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
220 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
221 goto fallback_fstat;
222 if (r < 0)
223 return r;
224
225 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
226 if (r < 0)
227 return r;
228
229 if (mount_id != mount_id_parent)
230 return 1;
231
232 /* Hmm, so, the mount ids are the same. This leaves one
233 * special case though for the root file system. For that,
234 * let's see if the parent directory has the same inode as we
235 * are interested in. Hence, let's also do fstat() checks now,
236 * too, but avoid the st_dev comparisons, since they aren't
237 * that useful on unionfs mounts. */
238 check_st_dev = false;
239
240 fallback_fstat:
241 /* yay for fstatat() taking a different set of flags than the other
242 * _at() above */
243 if (flags & AT_SYMLINK_FOLLOW)
244 flags &= ~AT_SYMLINK_FOLLOW;
245 else
246 flags |= AT_SYMLINK_NOFOLLOW;
247 if (fstatat(fd, filename, &a, flags) < 0)
248 return -errno;
249
250 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
251 return -errno;
252
253 /* A directory with same device and inode as its parent? Must
254 * be the root directory */
255 if (a.st_dev == b.st_dev &&
256 a.st_ino == b.st_ino)
257 return 1;
258
259 return check_st_dev && (a.st_dev != b.st_dev);
260 }
261
262 /* flags can be AT_SYMLINK_FOLLOW or 0 */
263 int path_is_mount_point(const char *t, const char *root, int flags) {
264 _cleanup_free_ char *canonical = NULL;
265 _cleanup_close_ int fd = -1;
266 int r;
267
268 assert(t);
269 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
270
271 if (path_equal(t, "/"))
272 return 1;
273
274 /* we need to resolve symlinks manually, we can't just rely on
275 * fd_is_mount_point() to do that for us; if we have a structure like
276 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
277 * look at needs to be /usr, not /. */
278 if (flags & AT_SYMLINK_FOLLOW) {
279 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
280 if (r < 0)
281 return r;
282
283 t = canonical;
284 }
285
286 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
287 if (fd < 0)
288 return -errno;
289
290 return fd_is_mount_point(fd, last_path_component(t), flags);
291 }
292
293 int path_get_mnt_id(const char *path, int *ret) {
294 int r;
295
296 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
297 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
298 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
299
300 return r;
301 }
302
303 int umount_recursive(const char *prefix, int flags) {
304 bool again;
305 int n = 0, r;
306
307 /* Try to umount everything recursively below a
308 * directory. Also, take care of stacked mounts, and keep
309 * unmounting them until they are gone. */
310
311 do {
312 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
313
314 again = false;
315 r = 0;
316
317 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
318 if (!proc_self_mountinfo)
319 return -errno;
320
321 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
322
323 for (;;) {
324 _cleanup_free_ char *path = NULL, *p = NULL;
325 int k;
326
327 k = fscanf(proc_self_mountinfo,
328 "%*s " /* (1) mount id */
329 "%*s " /* (2) parent id */
330 "%*s " /* (3) major:minor */
331 "%*s " /* (4) root */
332 "%ms " /* (5) mount point */
333 "%*s" /* (6) mount options */
334 "%*[^-]" /* (7) optional fields */
335 "- " /* (8) separator */
336 "%*s " /* (9) file system type */
337 "%*s" /* (10) mount source */
338 "%*s" /* (11) mount options 2 */
339 "%*[^\n]", /* some rubbish at the end */
340 &path);
341 if (k != 1) {
342 if (k == EOF)
343 break;
344
345 continue;
346 }
347
348 r = cunescape(path, UNESCAPE_RELAX, &p);
349 if (r < 0)
350 return r;
351
352 if (!path_startswith(p, prefix))
353 continue;
354
355 if (umount2(p, flags) < 0) {
356 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
357 continue;
358 }
359
360 log_debug("Successfully unmounted %s", p);
361
362 again = true;
363 n++;
364
365 break;
366 }
367
368 } while (again);
369
370 return r ? r : n;
371 }
372
373 static int get_mount_flags(const char *path, unsigned long *flags) {
374 struct statvfs buf;
375
376 if (statvfs(path, &buf) < 0)
377 return -errno;
378 *flags = buf.f_flag;
379 return 0;
380 }
381
382 /* Use this function only if do you have direct access to /proc/self/mountinfo
383 * and need the caller to open it for you. This is the case when /proc is
384 * masked or not mounted. Otherwise, use bind_remount_recursive. */
385 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
386 _cleanup_set_free_free_ Set *done = NULL;
387 _cleanup_free_ char *cleaned = NULL;
388 int r;
389
390 assert(proc_self_mountinfo);
391
392 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
393 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
394 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
395 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
396 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
397 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
398 * future submounts that have been triggered via autofs.
399 *
400 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
401 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
402
403 cleaned = strdup(prefix);
404 if (!cleaned)
405 return -ENOMEM;
406
407 path_simplify(cleaned, false);
408
409 done = set_new(&path_hash_ops);
410 if (!done)
411 return -ENOMEM;
412
413 for (;;) {
414 _cleanup_set_free_free_ Set *todo = NULL;
415 bool top_autofs = false;
416 char *x;
417 unsigned long orig_flags;
418
419 todo = set_new(&path_hash_ops);
420 if (!todo)
421 return -ENOMEM;
422
423 rewind(proc_self_mountinfo);
424
425 for (;;) {
426 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
427 int k;
428
429 k = fscanf(proc_self_mountinfo,
430 "%*s " /* (1) mount id */
431 "%*s " /* (2) parent id */
432 "%*s " /* (3) major:minor */
433 "%*s " /* (4) root */
434 "%ms " /* (5) mount point */
435 "%*s" /* (6) mount options (superblock) */
436 "%*[^-]" /* (7) optional fields */
437 "- " /* (8) separator */
438 "%ms " /* (9) file system type */
439 "%*s" /* (10) mount source */
440 "%*s" /* (11) mount options (bind mount) */
441 "%*[^\n]", /* some rubbish at the end */
442 &path,
443 &type);
444 if (k != 2) {
445 if (k == EOF)
446 break;
447
448 continue;
449 }
450
451 r = cunescape(path, UNESCAPE_RELAX, &p);
452 if (r < 0)
453 return r;
454
455 if (!path_startswith(p, cleaned))
456 continue;
457
458 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
459 * operate on. */
460 if (!path_equal(cleaned, p)) {
461 bool blacklisted = false;
462 char **i;
463
464 STRV_FOREACH(i, blacklist) {
465
466 if (path_equal(*i, cleaned))
467 continue;
468
469 if (!path_startswith(*i, cleaned))
470 continue;
471
472 if (path_startswith(p, *i)) {
473 blacklisted = true;
474 log_debug("Not remounting %s blacklisted by %s, called for %s", p, *i, cleaned);
475 break;
476 }
477 }
478 if (blacklisted)
479 continue;
480 }
481
482 /* Let's ignore autofs mounts. If they aren't
483 * triggered yet, we want to avoid triggering
484 * them, as we don't make any guarantees for
485 * future submounts anyway. If they are
486 * already triggered, then we will find
487 * another entry for this. */
488 if (streq(type, "autofs")) {
489 top_autofs = top_autofs || path_equal(cleaned, p);
490 continue;
491 }
492
493 if (!set_contains(done, p)) {
494 r = set_consume(todo, p);
495 p = NULL;
496 if (r == -EEXIST)
497 continue;
498 if (r < 0)
499 return r;
500 }
501 }
502
503 /* If we have no submounts to process anymore and if
504 * the root is either already done, or an autofs, we
505 * are done */
506 if (set_isempty(todo) &&
507 (top_autofs || set_contains(done, cleaned)))
508 return 0;
509
510 if (!set_contains(done, cleaned) &&
511 !set_contains(todo, cleaned)) {
512 /* The prefix directory itself is not yet a mount, make it one. */
513 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
514 return -errno;
515
516 orig_flags = 0;
517 (void) get_mount_flags(cleaned, &orig_flags);
518 orig_flags &= ~MS_RDONLY;
519
520 if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
521 return -errno;
522
523 log_debug("Made top-level directory %s a mount point.", prefix);
524
525 x = strdup(cleaned);
526 if (!x)
527 return -ENOMEM;
528
529 r = set_consume(done, x);
530 if (r < 0)
531 return r;
532 }
533
534 while ((x = set_steal_first(todo))) {
535
536 r = set_consume(done, x);
537 if (IN_SET(r, 0, -EEXIST))
538 continue;
539 if (r < 0)
540 return r;
541
542 /* Deal with mount points that are obstructed by a later mount */
543 r = path_is_mount_point(x, NULL, 0);
544 if (IN_SET(r, 0, -ENOENT))
545 continue;
546 if (IN_SET(r, -EACCES, -EPERM)) {
547 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
548 * may not be acceessed. E.g.,
549 *
550 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
551 * $ bindfs --no-allow-other ~/mnt ~/mnt
552 *
553 * Then, root user cannot access the mount point ~/mnt/mnt.
554 * In such cases, the submounts are ignored, as we have no way to manage them. */
555 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
556 continue;
557 }
558 if (r < 0)
559 return r;
560
561 /* Try to reuse the original flag set */
562 orig_flags = 0;
563 (void) get_mount_flags(x, &orig_flags);
564 orig_flags &= ~MS_RDONLY;
565
566 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
567 return -errno;
568
569 log_debug("Remounted %s read-only.", x);
570 }
571 }
572 }
573
574 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
575 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
576
577 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
578 if (!proc_self_mountinfo)
579 return -errno;
580
581 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
582
583 return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
584 }
585
586 int mount_move_root(const char *path) {
587 assert(path);
588
589 if (chdir(path) < 0)
590 return -errno;
591
592 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
593 return -errno;
594
595 if (chroot(".") < 0)
596 return -errno;
597
598 if (chdir("/") < 0)
599 return -errno;
600
601 return 0;
602 }
603
604 bool fstype_is_network(const char *fstype) {
605 const char *x;
606
607 x = startswith(fstype, "fuse.");
608 if (x)
609 fstype = x;
610
611 return STR_IN_SET(fstype,
612 "afs",
613 "cifs",
614 "smbfs",
615 "sshfs",
616 "ncpfs",
617 "ncp",
618 "nfs",
619 "nfs4",
620 "gfs",
621 "gfs2",
622 "glusterfs",
623 "pvfs2", /* OrangeFS */
624 "ocfs2",
625 "lustre");
626 }
627
628 bool fstype_is_api_vfs(const char *fstype) {
629 return STR_IN_SET(fstype,
630 "autofs",
631 "bpf",
632 "cgroup",
633 "cgroup2",
634 "configfs",
635 "cpuset",
636 "debugfs",
637 "devpts",
638 "devtmpfs",
639 "efivarfs",
640 "fusectl",
641 "hugetlbfs",
642 "mqueue",
643 "proc",
644 "pstore",
645 "ramfs",
646 "securityfs",
647 "sysfs",
648 "tmpfs",
649 "tracefs");
650 }
651
652 bool fstype_is_ro(const char *fstype) {
653 /* All Linux file systems that are necessarily read-only */
654 return STR_IN_SET(fstype,
655 "DM_verity_hash",
656 "iso9660",
657 "squashfs");
658 }
659
660 bool fstype_can_discard(const char *fstype) {
661 return STR_IN_SET(fstype,
662 "btrfs",
663 "ext4",
664 "vfat",
665 "xfs");
666 }
667
668 bool fstype_can_uid_gid(const char *fstype) {
669
670 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
671 * current and future. */
672
673 return STR_IN_SET(fstype,
674 "adfs",
675 "fat",
676 "hfs",
677 "hpfs",
678 "iso9660",
679 "msdos",
680 "ntfs",
681 "vfat");
682 }
683
684 int repeat_unmount(const char *path, int flags) {
685 bool done = false;
686
687 assert(path);
688
689 /* If there are multiple mounts on a mount point, this
690 * removes them all */
691
692 for (;;) {
693 if (umount2(path, flags) < 0) {
694
695 if (errno == EINVAL)
696 return done;
697
698 return -errno;
699 }
700
701 done = true;
702 }
703 }
704
705 const char* mode_to_inaccessible_node(mode_t mode) {
706 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
707 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
708 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
709 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
710 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
711 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
712
713 switch(mode & S_IFMT) {
714 case S_IFREG:
715 return "/run/systemd/inaccessible/reg";
716
717 case S_IFDIR:
718 return "/run/systemd/inaccessible/dir";
719
720 case S_IFCHR:
721 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
722 return "/run/systemd/inaccessible/chr";
723 return "/run/systemd/inaccessible/sock";
724
725 case S_IFBLK:
726 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
727 return "/run/systemd/inaccessible/blk";
728 return "/run/systemd/inaccessible/sock";
729
730 case S_IFIFO:
731 return "/run/systemd/inaccessible/fifo";
732
733 case S_IFSOCK:
734 return "/run/systemd/inaccessible/sock";
735 }
736 return NULL;
737 }
738
739 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
740 static char* mount_flags_to_string(long unsigned flags) {
741 char *x;
742 _cleanup_free_ char *y = NULL;
743 long unsigned overflow;
744
745 overflow = flags & ~(MS_RDONLY |
746 MS_NOSUID |
747 MS_NODEV |
748 MS_NOEXEC |
749 MS_SYNCHRONOUS |
750 MS_REMOUNT |
751 MS_MANDLOCK |
752 MS_DIRSYNC |
753 MS_NOATIME |
754 MS_NODIRATIME |
755 MS_BIND |
756 MS_MOVE |
757 MS_REC |
758 MS_SILENT |
759 MS_POSIXACL |
760 MS_UNBINDABLE |
761 MS_PRIVATE |
762 MS_SLAVE |
763 MS_SHARED |
764 MS_RELATIME |
765 MS_KERNMOUNT |
766 MS_I_VERSION |
767 MS_STRICTATIME |
768 MS_LAZYTIME);
769
770 if (flags == 0 || overflow != 0)
771 if (asprintf(&y, "%lx", overflow) < 0)
772 return NULL;
773
774 x = strjoin(FLAG(MS_RDONLY),
775 FLAG(MS_NOSUID),
776 FLAG(MS_NODEV),
777 FLAG(MS_NOEXEC),
778 FLAG(MS_SYNCHRONOUS),
779 FLAG(MS_REMOUNT),
780 FLAG(MS_MANDLOCK),
781 FLAG(MS_DIRSYNC),
782 FLAG(MS_NOATIME),
783 FLAG(MS_NODIRATIME),
784 FLAG(MS_BIND),
785 FLAG(MS_MOVE),
786 FLAG(MS_REC),
787 FLAG(MS_SILENT),
788 FLAG(MS_POSIXACL),
789 FLAG(MS_UNBINDABLE),
790 FLAG(MS_PRIVATE),
791 FLAG(MS_SLAVE),
792 FLAG(MS_SHARED),
793 FLAG(MS_RELATIME),
794 FLAG(MS_KERNMOUNT),
795 FLAG(MS_I_VERSION),
796 FLAG(MS_STRICTATIME),
797 FLAG(MS_LAZYTIME),
798 y);
799 if (!x)
800 return NULL;
801 if (!y)
802 x[strlen(x) - 1] = '\0'; /* truncate the last | */
803 return x;
804 }
805
806 int mount_verbose(
807 int error_log_level,
808 const char *what,
809 const char *where,
810 const char *type,
811 unsigned long flags,
812 const char *options) {
813
814 _cleanup_free_ char *fl = NULL, *o = NULL;
815 unsigned long f;
816 int r;
817
818 r = mount_option_mangle(options, flags, &f, &o);
819 if (r < 0)
820 return log_full_errno(error_log_level, r,
821 "Failed to mangle mount options %s: %m",
822 strempty(options));
823
824 fl = mount_flags_to_string(f);
825
826 if ((f & MS_REMOUNT) && !what && !type)
827 log_debug("Remounting %s (%s \"%s\")...",
828 where, strnull(fl), strempty(o));
829 else if (!what && !type)
830 log_debug("Mounting %s (%s \"%s\")...",
831 where, strnull(fl), strempty(o));
832 else if ((f & MS_BIND) && !type)
833 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
834 what, where, strnull(fl), strempty(o));
835 else if (f & MS_MOVE)
836 log_debug("Moving mount %s → %s (%s \"%s\")...",
837 what, where, strnull(fl), strempty(o));
838 else
839 log_debug("Mounting %s on %s (%s \"%s\")...",
840 strna(type), where, strnull(fl), strempty(o));
841 if (mount(what, where, type, f, o) < 0)
842 return log_full_errno(error_log_level, errno,
843 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
844 strna(what), strna(type), where, strnull(fl), strempty(o));
845 return 0;
846 }
847
848 int umount_verbose(const char *what) {
849 log_debug("Umounting %s...", what);
850 if (umount(what) < 0)
851 return log_error_errno(errno, "Failed to unmount %s: %m", what);
852 return 0;
853 }
854
855 const char *mount_propagation_flags_to_string(unsigned long flags) {
856
857 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
858 case 0:
859 return "";
860 case MS_SHARED:
861 return "shared";
862 case MS_SLAVE:
863 return "slave";
864 case MS_PRIVATE:
865 return "private";
866 }
867
868 return NULL;
869 }
870
871 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
872
873 if (isempty(name))
874 *ret = 0;
875 else if (streq(name, "shared"))
876 *ret = MS_SHARED;
877 else if (streq(name, "slave"))
878 *ret = MS_SLAVE;
879 else if (streq(name, "private"))
880 *ret = MS_PRIVATE;
881 else
882 return -EINVAL;
883 return 0;
884 }
885
886 int mount_option_mangle(
887 const char *options,
888 unsigned long mount_flags,
889 unsigned long *ret_mount_flags,
890 char **ret_remaining_options) {
891
892 const struct libmnt_optmap *map;
893 _cleanup_free_ char *ret = NULL;
894 const char *p;
895 int r;
896
897 /* This extracts mount flags from the mount options, and store
898 * non-mount-flag options to '*ret_remaining_options'.
899 * E.g.,
900 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
901 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
902 * "size=1630748k,mode=700,uid=1000,gid=1000".
903 * See more examples in test-mount-utils.c.
904 *
905 * Note that if 'options' does not contain any non-mount-flag options,
906 * then '*ret_remaining_options' is set to NULL instread of empty string.
907 * Note that this does not check validity of options stored in
908 * '*ret_remaining_options'.
909 * Note that if 'options' is NULL, then this just copies 'mount_flags'
910 * to '*ret_mount_flags'. */
911
912 assert(ret_mount_flags);
913 assert(ret_remaining_options);
914
915 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
916 if (!map)
917 return -EINVAL;
918
919 p = options;
920 for (;;) {
921 _cleanup_free_ char *word = NULL;
922 const struct libmnt_optmap *ent;
923
924 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
925 if (r < 0)
926 return r;
927 if (r == 0)
928 break;
929
930 for (ent = map; ent->name; ent++) {
931 /* All entries in MNT_LINUX_MAP do not take any argument.
932 * Thus, ent->name does not contain "=" or "[=]". */
933 if (!streq(word, ent->name))
934 continue;
935
936 if (!(ent->mask & MNT_INVERT))
937 mount_flags |= ent->id;
938 else if (mount_flags & ent->id)
939 mount_flags ^= ent->id;
940
941 break;
942 }
943
944 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
945 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
946 return -ENOMEM;
947 }
948
949 *ret_mount_flags = mount_flags;
950 *ret_remaining_options = TAKE_PTR(ret);
951
952 return 0;
953 }
954
955 int dev_is_devtmpfs(void) {
956 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
957 int mount_id, r;
958 char *e;
959
960 r = path_get_mnt_id("/dev", &mount_id);
961 if (r < 0)
962 return r;
963
964 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
965 if (!proc_self_mountinfo)
966 return -errno;
967
968 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
969
970 for (;;) {
971 _cleanup_free_ char *line = NULL;
972 int mid;
973
974 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
975 if (r < 0)
976 return r;
977 if (r == 0)
978 break;
979
980 if (sscanf(line, "%i", &mid) != 1)
981 continue;
982
983 if (mid != mount_id)
984 continue;
985
986 e = strstr(line, " - ");
987 if (!e)
988 continue;
989
990 /* accept any name that starts with the currently expected type */
991 if (startswith(e + 3, "devtmpfs"))
992 return true;
993 }
994
995 return false;
996 }