src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <sys/mount.h>
  25 #include <sys/stat.h>
  26 #include <sys/statvfs.h>
  27 #include <unistd.h>
  28
  29 #include "alloc-util.h"
  30 #include "escape.h"
  31 #include "fd-util.h"
  32 #include "fileio.h"
  33 #include "fs-util.h"
  34 #include "hashmap.h"
  35 #include "mount-util.h"
  36 #include "parse-util.h"
  37 #include "path-util.h"
  38 #include "set.h"
  39 #include "stdio-util.h"
  40 #include "string-util.h"
  41 #include "strv.h"
  42
  43 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  44  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  45  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  46  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  47  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  48  * with large file handles anyway. */
  49 #define ORIGINAL_MAX_HANDLE_SZ 128
  50
  51 int name_to_handle_at_loop(
  52                 int fd,
  53                 const char *path,
  54                 struct file_handle **ret_handle,
  55                 int *ret_mnt_id,
  56                 int flags) {
  57
  58         _cleanup_free_ struct file_handle *h;
  59         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  60
  61         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  62          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  63          * start value, it is not an upper bound on the buffer size required.
  64          *
  65          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  66          * as NULL if there's no interest in either. */
  67
  68         h = malloc0(offsetof(struct file_handle, f_handle) + n);
  69         if (!h)
  70                 return -ENOMEM;
  71
  72         h->handle_bytes = n;
  73
  74         for (;;) {
  75                 int mnt_id = -1;
  76
  77                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  78
  79                         if (ret_handle) {
  80                                 *ret_handle = h;
  81                                 h = NULL;
  82                         }
  83
  84                         if (ret_mnt_id)
  85                                 *ret_mnt_id = mnt_id;
  86
  87                         return 0;
  88                 }
  89                 if (errno != EOVERFLOW)
  90                         return -errno;
  91
  92                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  93
  94                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  95                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  96                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  97
  98                         *ret_mnt_id = mnt_id;
  99                         return 0;
 100                 }
 101
 102                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
 103                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
 104                  * buffer. In that case propagate EOVERFLOW */
 105                 if (h->handle_bytes <= n)
 106                         return -EOVERFLOW;
 107
 108                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
 109                 n = h->handle_bytes;
 110                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
 111                         return -EOVERFLOW;
 112
 113                 free(h);
 114                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
 115                 if (!h)
 116                         return -ENOMEM;
 117
 118                 h->handle_bytes = n;
 119         }
 120 }
 121
 122 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 123         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 124         _cleanup_free_ char *fdinfo = NULL;
 125         _cleanup_close_ int subfd = -1;
 126         char *p;
 127         int r;
 128
 129         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 130                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 131         else {
 132                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 133                 if (subfd < 0)
 134                         return -errno;
 135
 136                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 137         }
 138
 139         r = read_full_file(path, &fdinfo, NULL);
 140         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 141                 return -EOPNOTSUPP;
 142         if (r < 0)
 143                 return r;
 144
 145         p = startswith(fdinfo, "mnt_id:");
 146         if (!p) {
 147                 p = strstr(fdinfo, "\nmnt_id:");
 148                 if (!p) /* The mnt_id field is a relatively new addition */
 149                         return -EOPNOTSUPP;
 150
 151                 p += 8;
 152         }
 153
 154         p += strspn(p, WHITESPACE);
 155         p[strcspn(p, WHITESPACE)] = 0;
 156
 157         return safe_atoi(p, mnt_id);
 158 }
 159
 160 int fd_is_mount_point(int fd, const char *filename, int flags) {
 161         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 162         int mount_id = -1, mount_id_parent = -1;
 163         bool nosupp = false, check_st_dev = true;
 164         struct stat a, b;
 165         int r;
 166
 167         assert(fd >= 0);
 168         assert(filename);
 169
 170         /* First we will try the name_to_handle_at() syscall, which
 171          * tells us the mount id and an opaque file "handle". It is
 172          * not supported everywhere though (kernel compile-time
 173          * option, not all file systems are hooked up). If it works
 174          * the mount id is usually good enough to tell us whether
 175          * something is a mount point.
 176          *
 177          * If that didn't work we will try to read the mount id from
 178          * /proc/self/fdinfo/<fd>. This is almost as good as
 179          * name_to_handle_at(), however, does not return the
 180          * opaque file handle. The opaque file handle is pretty useful
 181          * to detect the root directory, which we should always
 182          * consider a mount point. Hence we use this only as
 183          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 184          * kernel addition.
 185          *
 186          * As last fallback we do traditional fstat() based st_dev
 187          * comparisons. This is how things were traditionally done,
 188          * but unionfs breaks this since it exposes file
 189          * systems with a variety of st_dev reported. Also, btrfs
 190          * subvolumes have different st_dev, even though they aren't
 191          * real mounts of their own. */
 192
 193         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 194         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 195                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 196                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 197                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 198                  * (EINVAL): fall back to simpler logic. */
 199                 goto fallback_fdinfo;
 200         else if (r == -EOPNOTSUPP)
 201                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 202                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 203                  * logic */
 204                 nosupp = true;
 205         else if (r < 0)
 206                 return r;
 207
 208         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 209         if (r == -EOPNOTSUPP) {
 210                 if (nosupp)
 211                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 212                         goto fallback_fdinfo;
 213                 else
 214                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 215                          * it must be a mount point. */
 216                         return 1;
 217         } else if (r < 0)
 218                 return r;
 219
 220         /* The parent can do name_to_handle_at() but the
 221          * directory we are interested in can't? If so, it
 222          * must be a mount point. */
 223         if (nosupp)
 224                 return 1;
 225
 226         /* If the file handle for the directory we are
 227          * interested in and its parent are identical, we
 228          * assume this is the root directory, which is a mount
 229          * point. */
 230
 231         if (h->handle_bytes == h_parent->handle_bytes &&
 232             h->handle_type == h_parent->handle_type &&
 233             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 234                 return 1;
 235
 236         return mount_id != mount_id_parent;
 237
 238 fallback_fdinfo:
 239         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 240         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 241                 goto fallback_fstat;
 242         if (r < 0)
 243                 return r;
 244
 245         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 246         if (r < 0)
 247                 return r;
 248
 249         if (mount_id != mount_id_parent)
 250                 return 1;
 251
 252         /* Hmm, so, the mount ids are the same. This leaves one
 253          * special case though for the root file system. For that,
 254          * let's see if the parent directory has the same inode as we
 255          * are interested in. Hence, let's also do fstat() checks now,
 256          * too, but avoid the st_dev comparisons, since they aren't
 257          * that useful on unionfs mounts. */
 258         check_st_dev = false;
 259
 260 fallback_fstat:
 261         /* yay for fstatat() taking a different set of flags than the other
 262          * _at() above */
 263         if (flags & AT_SYMLINK_FOLLOW)
 264                 flags &= ~AT_SYMLINK_FOLLOW;
 265         else
 266                 flags |= AT_SYMLINK_NOFOLLOW;
 267         if (fstatat(fd, filename, &a, flags) < 0)
 268                 return -errno;
 269
 270         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 271                 return -errno;
 272
 273         /* A directory with same device and inode as its parent? Must
 274          * be the root directory */
 275         if (a.st_dev == b.st_dev &&
 276             a.st_ino == b.st_ino)
 277                 return 1;
 278
 279         return check_st_dev && (a.st_dev != b.st_dev);
 280 }
 281
 282 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 283 int path_is_mount_point(const char *t, const char *root, int flags) {
 284         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 285         _cleanup_close_ int fd = -1;
 286         int r;
 287
 288         assert(t);
 289         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 290
 291         if (path_equal(t, "/"))
 292                 return 1;
 293
 294         /* we need to resolve symlinks manually, we can't just rely on
 295          * fd_is_mount_point() to do that for us; if we have a structure like
 296          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 297          * look at needs to be /usr, not /. */
 298         if (flags & AT_SYMLINK_FOLLOW) {
 299                 r = chase_symlinks(t, root, 0, &canonical);
 300                 if (r < 0)
 301                         return r;
 302
 303                 t = canonical;
 304         }
 305
 306         parent = dirname_malloc(t);
 307         if (!parent)
 308                 return -ENOMEM;
 309
 310         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 311         if (fd < 0)
 312                 return -errno;
 313
 314         return fd_is_mount_point(fd, last_path_component(t), flags);
 315 }
 316
 317 int path_get_mnt_id(const char *path, int *ret) {
 318         int r;
 319
 320         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 321         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 322                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 323
 324         return r;
 325 }
 326
 327 int umount_recursive(const char *prefix, int flags) {
 328         bool again;
 329         int n = 0, r;
 330
 331         /* Try to umount everything recursively below a
 332          * directory. Also, take care of stacked mounts, and keep
 333          * unmounting them until they are gone. */
 334
 335         do {
 336                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 337
 338                 again = false;
 339                 r = 0;
 340
 341                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 342                 if (!proc_self_mountinfo)
 343                         return -errno;
 344
 345                 for (;;) {
 346                         _cleanup_free_ char *path = NULL, *p = NULL;
 347                         int k;
 348
 349                         k = fscanf(proc_self_mountinfo,
 350                                    "%*s "       /* (1) mount id */
 351                                    "%*s "       /* (2) parent id */
 352                                    "%*s "       /* (3) major:minor */
 353                                    "%*s "       /* (4) root */
 354                                    "%ms "       /* (5) mount point */
 355                                    "%*s"        /* (6) mount options */
 356                                    "%*[^-]"     /* (7) optional fields */
 357                                    "- "         /* (8) separator */
 358                                    "%*s "       /* (9) file system type */
 359                                    "%*s"        /* (10) mount source */
 360                                    "%*s"        /* (11) mount options 2 */
 361                                    "%*[^\n]",   /* some rubbish at the end */
 362                                    &path);
 363                         if (k != 1) {
 364                                 if (k == EOF)
 365                                         break;
 366
 367                                 continue;
 368                         }
 369
 370                         r = cunescape(path, UNESCAPE_RELAX, &p);
 371                         if (r < 0)
 372                                 return r;
 373
 374                         if (!path_startswith(p, prefix))
 375                                 continue;
 376
 377                         if (umount2(p, flags) < 0) {
 378                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 379                                 continue;
 380                         }
 381
 382                         log_debug("Successfully unmounted %s", p);
 383
 384                         again = true;
 385                         n++;
 386
 387                         break;
 388                 }
 389
 390         } while (again);
 391
 392         return r ? r : n;
 393 }
 394
 395 static int get_mount_flags(const char *path, unsigned long *flags) {
 396         struct statvfs buf;
 397
 398         if (statvfs(path, &buf) < 0)
 399                 return -errno;
 400         *flags = buf.f_flag;
 401         return 0;
 402 }
 403
 404 /* Use this function only if do you have direct access to /proc/self/mountinfo
 405  * and need the caller to open it for you. This is the case when /proc is
 406  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 407 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 408         _cleanup_set_free_free_ Set *done = NULL;
 409         _cleanup_free_ char *cleaned = NULL;
 410         int r;
 411
 412         assert(proc_self_mountinfo);
 413
 414         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 415          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 416          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 417          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 418          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 419          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 420          * future submounts that have been triggered via autofs.
 421          *
 422          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 423          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 424
 425         cleaned = strdup(prefix);
 426         if (!cleaned)
 427                 return -ENOMEM;
 428
 429         path_kill_slashes(cleaned);
 430
 431         done = set_new(&string_hash_ops);
 432         if (!done)
 433                 return -ENOMEM;
 434
 435         for (;;) {
 436                 _cleanup_set_free_free_ Set *todo = NULL;
 437                 bool top_autofs = false;
 438                 char *x;
 439                 unsigned long orig_flags;
 440
 441                 todo = set_new(&string_hash_ops);
 442                 if (!todo)
 443                         return -ENOMEM;
 444
 445                 rewind(proc_self_mountinfo);
 446
 447                 for (;;) {
 448                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 449                         int k;
 450
 451                         k = fscanf(proc_self_mountinfo,
 452                                    "%*s "       /* (1) mount id */
 453                                    "%*s "       /* (2) parent id */
 454                                    "%*s "       /* (3) major:minor */
 455                                    "%*s "       /* (4) root */
 456                                    "%ms "       /* (5) mount point */
 457                                    "%*s"        /* (6) mount options (superblock) */
 458                                    "%*[^-]"     /* (7) optional fields */
 459                                    "- "         /* (8) separator */
 460                                    "%ms "       /* (9) file system type */
 461                                    "%*s"        /* (10) mount source */
 462                                    "%*s"        /* (11) mount options (bind mount) */
 463                                    "%*[^\n]",   /* some rubbish at the end */
 464                                    &path,
 465                                    &type);
 466                         if (k != 2) {
 467                                 if (k == EOF)
 468                                         break;
 469
 470                                 continue;
 471                         }
 472
 473                         r = cunescape(path, UNESCAPE_RELAX, &p);
 474                         if (r < 0)
 475                                 return r;
 476
 477                         if (!path_startswith(p, cleaned))
 478                                 continue;
 479
 480                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 481                          * operate on. */
 482                         if (!path_equal(cleaned, p)) {
 483                                 bool blacklisted = false;
 484                                 char **i;
 485
 486                                 STRV_FOREACH(i, blacklist) {
 487
 488                                         if (path_equal(*i, cleaned))
 489                                                 continue;
 490
 491                                         if (!path_startswith(*i, cleaned))
 492                                                 continue;
 493
 494                                         if (path_startswith(p, *i)) {
 495                                                 blacklisted = true;
 496                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 497                                                 break;
 498                                         }
 499                                 }
 500                                 if (blacklisted)
 501                                         continue;
 502                         }
 503
 504                         /* Let's ignore autofs mounts.  If they aren't
 505                          * triggered yet, we want to avoid triggering
 506                          * them, as we don't make any guarantees for
 507                          * future submounts anyway.  If they are
 508                          * already triggered, then we will find
 509                          * another entry for this. */
 510                         if (streq(type, "autofs")) {
 511                                 top_autofs = top_autofs || path_equal(cleaned, p);
 512                                 continue;
 513                         }
 514
 515                         if (!set_contains(done, p)) {
 516                                 r = set_consume(todo, p);
 517                                 p = NULL;
 518                                 if (r == -EEXIST)
 519                                         continue;
 520                                 if (r < 0)
 521                                         return r;
 522                         }
 523                 }
 524
 525                 /* If we have no submounts to process anymore and if
 526                  * the root is either already done, or an autofs, we
 527                  * are done */
 528                 if (set_isempty(todo) &&
 529                     (top_autofs || set_contains(done, cleaned)))
 530                         return 0;
 531
 532                 if (!set_contains(done, cleaned) &&
 533                     !set_contains(todo, cleaned)) {
 534                         /* The prefix directory itself is not yet a mount, make it one. */
 535                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 536                                 return -errno;
 537
 538                         orig_flags = 0;
 539                         (void) get_mount_flags(cleaned, &orig_flags);
 540                         orig_flags &= ~MS_RDONLY;
 541
 542                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 543                                 return -errno;
 544
 545                         log_debug("Made top-level directory %s a mount point.", prefix);
 546
 547                         x = strdup(cleaned);
 548                         if (!x)
 549                                 return -ENOMEM;
 550
 551                         r = set_consume(done, x);
 552                         if (r < 0)
 553                                 return r;
 554                 }
 555
 556                 while ((x = set_steal_first(todo))) {
 557
 558                         r = set_consume(done, x);
 559                         if (IN_SET(r, 0, -EEXIST))
 560                                 continue;
 561                         if (r < 0)
 562                                 return r;
 563
 564                         /* Deal with mount points that are obstructed by a later mount */
 565                         r = path_is_mount_point(x, NULL, 0);
 566                         if (IN_SET(r, 0, -ENOENT))
 567                                 continue;
 568                         if (r < 0)
 569                                 return r;
 570
 571                         /* Try to reuse the original flag set */
 572                         orig_flags = 0;
 573                         (void) get_mount_flags(x, &orig_flags);
 574                         orig_flags &= ~MS_RDONLY;
 575
 576                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 577                                 return -errno;
 578
 579                         log_debug("Remounted %s read-only.", x);
 580                 }
 581         }
 582 }
 583
 584 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 585         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 586
 587         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 588         if (!proc_self_mountinfo)
 589                 return -errno;
 590
 591         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 592 }
 593
 594 int mount_move_root(const char *path) {
 595         assert(path);
 596
 597         if (chdir(path) < 0)
 598                 return -errno;
 599
 600         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 601                 return -errno;
 602
 603         if (chroot(".") < 0)
 604                 return -errno;
 605
 606         if (chdir("/") < 0)
 607                 return -errno;
 608
 609         return 0;
 610 }
 611
 612 bool fstype_is_network(const char *fstype) {
 613         const char *x;
 614
 615         x = startswith(fstype, "fuse.");
 616         if (x)
 617                 fstype = x;
 618
 619         return STR_IN_SET(fstype,
 620                           "afs",
 621                           "cifs",
 622                           "smbfs",
 623                           "sshfs",
 624                           "ncpfs",
 625                           "ncp",
 626                           "nfs",
 627                           "nfs4",
 628                           "gfs",
 629                           "gfs2",
 630                           "glusterfs",
 631                           "pvfs2", /* OrangeFS */
 632                           "ocfs2",
 633                           "lustre");
 634 }
 635
 636 bool fstype_is_api_vfs(const char *fstype) {
 637         return STR_IN_SET(fstype,
 638                           "autofs",
 639                           "bpf",
 640                           "cgroup",
 641                           "cgroup2",
 642                           "configfs",
 643                           "cpuset",
 644                           "debugfs",
 645                           "devpts",
 646                           "devtmpfs",
 647                           "efivarfs",
 648                           "fusectl",
 649                           "hugetlbfs",
 650                           "mqueue",
 651                           "proc",
 652                           "pstore",
 653                           "ramfs",
 654                           "securityfs",
 655                           "sysfs",
 656                           "tmpfs",
 657                           "tracefs");
 658 }
 659
 660 bool fstype_is_ro(const char *fstype) {
 661         /* All Linux file systems that are necessarily read-only */
 662         return STR_IN_SET(fstype,
 663                           "DM_verity_hash",
 664                           "iso9660",
 665                           "squashfs");
 666 }
 667
 668 bool fstype_can_discard(const char *fstype) {
 669         return STR_IN_SET(fstype,
 670                           "btrfs",
 671                           "ext4",
 672                           "vfat",
 673                           "xfs");
 674 }
 675
 676 int repeat_unmount(const char *path, int flags) {
 677         bool done = false;
 678
 679         assert(path);
 680
 681         /* If there are multiple mounts on a mount point, this
 682          * removes them all */
 683
 684         for (;;) {
 685                 if (umount2(path, flags) < 0) {
 686
 687                         if (errno == EINVAL)
 688                                 return done;
 689
 690                         return -errno;
 691                 }
 692
 693                 done = true;
 694         }
 695 }
 696
 697 const char* mode_to_inaccessible_node(mode_t mode) {
 698         /* This function maps a node type to the correspondent inaccessible node type.
 699          * Character and block inaccessible devices may not be created (because major=0 and minor=0),
 700          * in such case we map character and block devices to the inaccessible node type socket. */
 701         switch(mode & S_IFMT) {
 702                 case S_IFREG:
 703                         return "/run/systemd/inaccessible/reg";
 704                 case S_IFDIR:
 705                         return "/run/systemd/inaccessible/dir";
 706                 case S_IFCHR:
 707                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 708                                 return "/run/systemd/inaccessible/chr";
 709                         return "/run/systemd/inaccessible/sock";
 710                 case S_IFBLK:
 711                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 712                                 return "/run/systemd/inaccessible/blk";
 713                         return "/run/systemd/inaccessible/sock";
 714                 case S_IFIFO:
 715                         return "/run/systemd/inaccessible/fifo";
 716                 case S_IFSOCK:
 717                         return "/run/systemd/inaccessible/sock";
 718         }
 719         return NULL;
 720 }
 721
 722 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 723 static char* mount_flags_to_string(long unsigned flags) {
 724         char *x;
 725         _cleanup_free_ char *y = NULL;
 726         long unsigned overflow;
 727
 728         overflow = flags & ~(MS_RDONLY |
 729                              MS_NOSUID |
 730                              MS_NODEV |
 731                              MS_NOEXEC |
 732                              MS_SYNCHRONOUS |
 733                              MS_REMOUNT |
 734                              MS_MANDLOCK |
 735                              MS_DIRSYNC |
 736                              MS_NOATIME |
 737                              MS_NODIRATIME |
 738                              MS_BIND |
 739                              MS_MOVE |
 740                              MS_REC |
 741                              MS_SILENT |
 742                              MS_POSIXACL |
 743                              MS_UNBINDABLE |
 744                              MS_PRIVATE |
 745                              MS_SLAVE |
 746                              MS_SHARED |
 747                              MS_RELATIME |
 748                              MS_KERNMOUNT |
 749                              MS_I_VERSION |
 750                              MS_STRICTATIME |
 751                              MS_LAZYTIME);
 752
 753         if (flags == 0 || overflow != 0)
 754                 if (asprintf(&y, "%lx", overflow) < 0)
 755                         return NULL;
 756
 757         x = strjoin(FLAG(MS_RDONLY),
 758                     FLAG(MS_NOSUID),
 759                     FLAG(MS_NODEV),
 760                     FLAG(MS_NOEXEC),
 761                     FLAG(MS_SYNCHRONOUS),
 762                     FLAG(MS_REMOUNT),
 763                     FLAG(MS_MANDLOCK),
 764                     FLAG(MS_DIRSYNC),
 765                     FLAG(MS_NOATIME),
 766                     FLAG(MS_NODIRATIME),
 767                     FLAG(MS_BIND),
 768                     FLAG(MS_MOVE),
 769                     FLAG(MS_REC),
 770                     FLAG(MS_SILENT),
 771                     FLAG(MS_POSIXACL),
 772                     FLAG(MS_UNBINDABLE),
 773                     FLAG(MS_PRIVATE),
 774                     FLAG(MS_SLAVE),
 775                     FLAG(MS_SHARED),
 776                     FLAG(MS_RELATIME),
 777                     FLAG(MS_KERNMOUNT),
 778                     FLAG(MS_I_VERSION),
 779                     FLAG(MS_STRICTATIME),
 780                     FLAG(MS_LAZYTIME),
 781                     y);
 782         if (!x)
 783                 return NULL;
 784         if (!y)
 785                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 786         return x;
 787 }
 788
 789 int mount_verbose(
 790                 int error_log_level,
 791                 const char *what,
 792                 const char *where,
 793                 const char *type,
 794                 unsigned long flags,
 795                 const char *options) {
 796
 797         _cleanup_free_ char *fl = NULL;
 798
 799         fl = mount_flags_to_string(flags);
 800
 801         if ((flags & MS_REMOUNT) && !what && !type)
 802                 log_debug("Remounting %s (%s \"%s\")...",
 803                           where, strnull(fl), strempty(options));
 804         else if (!what && !type)
 805                 log_debug("Mounting %s (%s \"%s\")...",
 806                           where, strnull(fl), strempty(options));
 807         else if ((flags & MS_BIND) && !type)
 808                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 809                           what, where, strnull(fl), strempty(options));
 810         else if (flags & MS_MOVE)
 811                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 812                           what, where, strnull(fl), strempty(options));
 813         else
 814                 log_debug("Mounting %s on %s (%s \"%s\")...",
 815                           strna(type), where, strnull(fl), strempty(options));
 816         if (mount(what, where, type, flags, options) < 0)
 817                 return log_full_errno(error_log_level, errno,
 818                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 819                                       strna(type), where, strnull(fl), strempty(options));
 820         return 0;
 821 }
 822
 823 int umount_verbose(const char *what) {
 824         log_debug("Umounting %s...", what);
 825         if (umount(what) < 0)
 826                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 827         return 0;
 828 }
 829
 830 const char *mount_propagation_flags_to_string(unsigned long flags) {
 831
 832         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 833         case 0:
 834                 return "";
 835         case MS_SHARED:
 836                 return "shared";
 837         case MS_SLAVE:
 838                 return "slave";
 839         case MS_PRIVATE:
 840                 return "private";
 841         }
 842
 843         return NULL;
 844 }
 845
 846
 847 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 848
 849         if (isempty(name))
 850                 *ret = 0;
 851         else if (streq(name, "shared"))
 852                 *ret = MS_SHARED;
 853         else if (streq(name, "slave"))
 854                 *ret = MS_SLAVE;
 855         else if (streq(name, "private"))
 856                 *ret = MS_PRIVATE;
 857         else
 858                 return -EINVAL;
 859         return 0;
 860 }