src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <stdio_ext.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <sys/mount.h>
  26 #include <sys/stat.h>
  27 #include <sys/statvfs.h>
  28 #include <unistd.h>
  29
  30 /* Include later */
  31 #include <libmount.h>
  32
  33 #include "alloc-util.h"
  34 #include "escape.h"
  35 #include "extract-word.h"
  36 #include "fd-util.h"
  37 #include "fileio.h"
  38 #include "fs-util.h"
  39 #include "hashmap.h"
  40 #include "mount-util.h"
  41 #include "parse-util.h"
  42 #include "path-util.h"
  43 #include "set.h"
  44 #include "stdio-util.h"
  45 #include "string-util.h"
  46 #include "strv.h"
  47
  48 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  49  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  50  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  51  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  52  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  53  * with large file handles anyway. */
  54 #define ORIGINAL_MAX_HANDLE_SZ 128
  55
  56 int name_to_handle_at_loop(
  57                 int fd,
  58                 const char *path,
  59                 struct file_handle **ret_handle,
  60                 int *ret_mnt_id,
  61                 int flags) {
  62
  63         _cleanup_free_ struct file_handle *h = NULL;
  64         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  65
  66         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  67          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  68          * start value, it is not an upper bound on the buffer size required.
  69          *
  70          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  71          * as NULL if there's no interest in either. */
  72
  73         for (;;) {
  74                 int mnt_id = -1;
  75
  76                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  77                 if (!h)
  78                         return -ENOMEM;
  79
  80                 h->handle_bytes = n;
  81
  82                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  83
  84                         if (ret_handle)
  85                                 *ret_handle = TAKE_PTR(h);
  86
  87                         if (ret_mnt_id)
  88                                 *ret_mnt_id = mnt_id;
  89
  90                         return 0;
  91                 }
  92                 if (errno != EOVERFLOW)
  93                         return -errno;
  94
  95                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  96
  97                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  98                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  99                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
 100
 101                         *ret_mnt_id = mnt_id;
 102                         return 0;
 103                 }
 104
 105                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
 106                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
 107                  * buffer. In that case propagate EOVERFLOW */
 108                 if (h->handle_bytes <= n)
 109                         return -EOVERFLOW;
 110
 111                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
 112                 n = h->handle_bytes;
 113                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
 114                         return -EOVERFLOW;
 115
 116                 h = mfree(h);
 117         }
 118 }
 119
 120 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 121         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 122         _cleanup_free_ char *fdinfo = NULL;
 123         _cleanup_close_ int subfd = -1;
 124         char *p;
 125         int r;
 126
 127         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 128                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 129         else {
 130                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 131                 if (subfd < 0)
 132                         return -errno;
 133
 134                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 135         }
 136
 137         r = read_full_file(path, &fdinfo, NULL);
 138         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 139                 return -EOPNOTSUPP;
 140         if (r < 0)
 141                 return r;
 142
 143         p = startswith(fdinfo, "mnt_id:");
 144         if (!p) {
 145                 p = strstr(fdinfo, "\nmnt_id:");
 146                 if (!p) /* The mnt_id field is a relatively new addition */
 147                         return -EOPNOTSUPP;
 148
 149                 p += 8;
 150         }
 151
 152         p += strspn(p, WHITESPACE);
 153         p[strcspn(p, WHITESPACE)] = 0;
 154
 155         return safe_atoi(p, mnt_id);
 156 }
 157
 158 int fd_is_mount_point(int fd, const char *filename, int flags) {
 159         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 160         int mount_id = -1, mount_id_parent = -1;
 161         bool nosupp = false, check_st_dev = true;
 162         struct stat a, b;
 163         int r;
 164
 165         assert(fd >= 0);
 166         assert(filename);
 167
 168         /* First we will try the name_to_handle_at() syscall, which
 169          * tells us the mount id and an opaque file "handle". It is
 170          * not supported everywhere though (kernel compile-time
 171          * option, not all file systems are hooked up). If it works
 172          * the mount id is usually good enough to tell us whether
 173          * something is a mount point.
 174          *
 175          * If that didn't work we will try to read the mount id from
 176          * /proc/self/fdinfo/<fd>. This is almost as good as
 177          * name_to_handle_at(), however, does not return the
 178          * opaque file handle. The opaque file handle is pretty useful
 179          * to detect the root directory, which we should always
 180          * consider a mount point. Hence we use this only as
 181          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 182          * kernel addition.
 183          *
 184          * As last fallback we do traditional fstat() based st_dev
 185          * comparisons. This is how things were traditionally done,
 186          * but unionfs breaks this since it exposes file
 187          * systems with a variety of st_dev reported. Also, btrfs
 188          * subvolumes have different st_dev, even though they aren't
 189          * real mounts of their own. */
 190
 191         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 192         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 193                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 194                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 195                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 196                  * (EINVAL): fall back to simpler logic. */
 197                 goto fallback_fdinfo;
 198         else if (r == -EOPNOTSUPP)
 199                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 200                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 201                  * logic */
 202                 nosupp = true;
 203         else if (r < 0)
 204                 return r;
 205
 206         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 207         if (r == -EOPNOTSUPP) {
 208                 if (nosupp)
 209                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 210                         goto fallback_fdinfo;
 211                 else
 212                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 213                          * it must be a mount point. */
 214                         return 1;
 215         } else if (r < 0)
 216                 return r;
 217
 218         /* The parent can do name_to_handle_at() but the
 219          * directory we are interested in can't? If so, it
 220          * must be a mount point. */
 221         if (nosupp)
 222                 return 1;
 223
 224         /* If the file handle for the directory we are
 225          * interested in and its parent are identical, we
 226          * assume this is the root directory, which is a mount
 227          * point. */
 228
 229         if (h->handle_bytes == h_parent->handle_bytes &&
 230             h->handle_type == h_parent->handle_type &&
 231             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 232                 return 1;
 233
 234         return mount_id != mount_id_parent;
 235
 236 fallback_fdinfo:
 237         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 238         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 239                 goto fallback_fstat;
 240         if (r < 0)
 241                 return r;
 242
 243         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 244         if (r < 0)
 245                 return r;
 246
 247         if (mount_id != mount_id_parent)
 248                 return 1;
 249
 250         /* Hmm, so, the mount ids are the same. This leaves one
 251          * special case though for the root file system. For that,
 252          * let's see if the parent directory has the same inode as we
 253          * are interested in. Hence, let's also do fstat() checks now,
 254          * too, but avoid the st_dev comparisons, since they aren't
 255          * that useful on unionfs mounts. */
 256         check_st_dev = false;
 257
 258 fallback_fstat:
 259         /* yay for fstatat() taking a different set of flags than the other
 260          * _at() above */
 261         if (flags & AT_SYMLINK_FOLLOW)
 262                 flags &= ~AT_SYMLINK_FOLLOW;
 263         else
 264                 flags |= AT_SYMLINK_NOFOLLOW;
 265         if (fstatat(fd, filename, &a, flags) < 0)
 266                 return -errno;
 267
 268         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 269                 return -errno;
 270
 271         /* A directory with same device and inode as its parent? Must
 272          * be the root directory */
 273         if (a.st_dev == b.st_dev &&
 274             a.st_ino == b.st_ino)
 275                 return 1;
 276
 277         return check_st_dev && (a.st_dev != b.st_dev);
 278 }
 279
 280 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 281 int path_is_mount_point(const char *t, const char *root, int flags) {
 282         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 283         _cleanup_close_ int fd = -1;
 284         int r;
 285
 286         assert(t);
 287         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 288
 289         if (path_equal(t, "/"))
 290                 return 1;
 291
 292         /* we need to resolve symlinks manually, we can't just rely on
 293          * fd_is_mount_point() to do that for us; if we have a structure like
 294          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 295          * look at needs to be /usr, not /. */
 296         if (flags & AT_SYMLINK_FOLLOW) {
 297                 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
 298                 if (r < 0)
 299                         return r;
 300
 301                 t = canonical;
 302         }
 303
 304         parent = dirname_malloc(t);
 305         if (!parent)
 306                 return -ENOMEM;
 307
 308         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 309         if (fd < 0)
 310                 return -errno;
 311
 312         return fd_is_mount_point(fd, last_path_component(t), flags);
 313 }
 314
 315 int path_get_mnt_id(const char *path, int *ret) {
 316         int r;
 317
 318         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 319         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 320                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 321
 322         return r;
 323 }
 324
 325 int umount_recursive(const char *prefix, int flags) {
 326         bool again;
 327         int n = 0, r;
 328
 329         /* Try to umount everything recursively below a
 330          * directory. Also, take care of stacked mounts, and keep
 331          * unmounting them until they are gone. */
 332
 333         do {
 334                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 335
 336                 again = false;
 337                 r = 0;
 338
 339                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 340                 if (!proc_self_mountinfo)
 341                         return -errno;
 342
 343                 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 344
 345                 for (;;) {
 346                         _cleanup_free_ char *path = NULL, *p = NULL;
 347                         int k;
 348
 349                         k = fscanf(proc_self_mountinfo,
 350                                    "%*s "       /* (1) mount id */
 351                                    "%*s "       /* (2) parent id */
 352                                    "%*s "       /* (3) major:minor */
 353                                    "%*s "       /* (4) root */
 354                                    "%ms "       /* (5) mount point */
 355                                    "%*s"        /* (6) mount options */
 356                                    "%*[^-]"     /* (7) optional fields */
 357                                    "- "         /* (8) separator */
 358                                    "%*s "       /* (9) file system type */
 359                                    "%*s"        /* (10) mount source */
 360                                    "%*s"        /* (11) mount options 2 */
 361                                    "%*[^\n]",   /* some rubbish at the end */
 362                                    &path);
 363                         if (k != 1) {
 364                                 if (k == EOF)
 365                                         break;
 366
 367                                 continue;
 368                         }
 369
 370                         r = cunescape(path, UNESCAPE_RELAX, &p);
 371                         if (r < 0)
 372                                 return r;
 373
 374                         if (!path_startswith(p, prefix))
 375                                 continue;
 376
 377                         if (umount2(p, flags) < 0) {
 378                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 379                                 continue;
 380                         }
 381
 382                         log_debug("Successfully unmounted %s", p);
 383
 384                         again = true;
 385                         n++;
 386
 387                         break;
 388                 }
 389
 390         } while (again);
 391
 392         return r ? r : n;
 393 }
 394
 395 static int get_mount_flags(const char *path, unsigned long *flags) {
 396         struct statvfs buf;
 397
 398         if (statvfs(path, &buf) < 0)
 399                 return -errno;
 400         *flags = buf.f_flag;
 401         return 0;
 402 }
 403
 404 /* Use this function only if do you have direct access to /proc/self/mountinfo
 405  * and need the caller to open it for you. This is the case when /proc is
 406  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 407 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 408         _cleanup_set_free_free_ Set *done = NULL;
 409         _cleanup_free_ char *cleaned = NULL;
 410         int r;
 411
 412         assert(proc_self_mountinfo);
 413
 414         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 415          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 416          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 417          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 418          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 419          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 420          * future submounts that have been triggered via autofs.
 421          *
 422          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 423          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 424
 425         cleaned = strdup(prefix);
 426         if (!cleaned)
 427                 return -ENOMEM;
 428
 429         path_kill_slashes(cleaned);
 430
 431         done = set_new(&path_hash_ops);
 432         if (!done)
 433                 return -ENOMEM;
 434
 435         for (;;) {
 436                 _cleanup_set_free_free_ Set *todo = NULL;
 437                 bool top_autofs = false;
 438                 char *x;
 439                 unsigned long orig_flags;
 440
 441                 todo = set_new(&path_hash_ops);
 442                 if (!todo)
 443                         return -ENOMEM;
 444
 445                 rewind(proc_self_mountinfo);
 446
 447                 for (;;) {
 448                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 449                         int k;
 450
 451                         k = fscanf(proc_self_mountinfo,
 452                                    "%*s "       /* (1) mount id */
 453                                    "%*s "       /* (2) parent id */
 454                                    "%*s "       /* (3) major:minor */
 455                                    "%*s "       /* (4) root */
 456                                    "%ms "       /* (5) mount point */
 457                                    "%*s"        /* (6) mount options (superblock) */
 458                                    "%*[^-]"     /* (7) optional fields */
 459                                    "- "         /* (8) separator */
 460                                    "%ms "       /* (9) file system type */
 461                                    "%*s"        /* (10) mount source */
 462                                    "%*s"        /* (11) mount options (bind mount) */
 463                                    "%*[^\n]",   /* some rubbish at the end */
 464                                    &path,
 465                                    &type);
 466                         if (k != 2) {
 467                                 if (k == EOF)
 468                                         break;
 469
 470                                 continue;
 471                         }
 472
 473                         r = cunescape(path, UNESCAPE_RELAX, &p);
 474                         if (r < 0)
 475                                 return r;
 476
 477                         if (!path_startswith(p, cleaned))
 478                                 continue;
 479
 480                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 481                          * operate on. */
 482                         if (!path_equal(cleaned, p)) {
 483                                 bool blacklisted = false;
 484                                 char **i;
 485
 486                                 STRV_FOREACH(i, blacklist) {
 487
 488                                         if (path_equal(*i, cleaned))
 489                                                 continue;
 490
 491                                         if (!path_startswith(*i, cleaned))
 492                                                 continue;
 493
 494                                         if (path_startswith(p, *i)) {
 495                                                 blacklisted = true;
 496                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 497                                                 break;
 498                                         }
 499                                 }
 500                                 if (blacklisted)
 501                                         continue;
 502                         }
 503
 504                         /* Let's ignore autofs mounts.  If they aren't
 505                          * triggered yet, we want to avoid triggering
 506                          * them, as we don't make any guarantees for
 507                          * future submounts anyway.  If they are
 508                          * already triggered, then we will find
 509                          * another entry for this. */
 510                         if (streq(type, "autofs")) {
 511                                 top_autofs = top_autofs || path_equal(cleaned, p);
 512                                 continue;
 513                         }
 514
 515                         if (!set_contains(done, p)) {
 516                                 r = set_consume(todo, p);
 517                                 p = NULL;
 518                                 if (r == -EEXIST)
 519                                         continue;
 520                                 if (r < 0)
 521                                         return r;
 522                         }
 523                 }
 524
 525                 /* If we have no submounts to process anymore and if
 526                  * the root is either already done, or an autofs, we
 527                  * are done */
 528                 if (set_isempty(todo) &&
 529                     (top_autofs || set_contains(done, cleaned)))
 530                         return 0;
 531
 532                 if (!set_contains(done, cleaned) &&
 533                     !set_contains(todo, cleaned)) {
 534                         /* The prefix directory itself is not yet a mount, make it one. */
 535                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 536                                 return -errno;
 537
 538                         orig_flags = 0;
 539                         (void) get_mount_flags(cleaned, &orig_flags);
 540                         orig_flags &= ~MS_RDONLY;
 541
 542                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 543                                 return -errno;
 544
 545                         log_debug("Made top-level directory %s a mount point.", prefix);
 546
 547                         x = strdup(cleaned);
 548                         if (!x)
 549                                 return -ENOMEM;
 550
 551                         r = set_consume(done, x);
 552                         if (r < 0)
 553                                 return r;
 554                 }
 555
 556                 while ((x = set_steal_first(todo))) {
 557
 558                         r = set_consume(done, x);
 559                         if (IN_SET(r, 0, -EEXIST))
 560                                 continue;
 561                         if (r < 0)
 562                                 return r;
 563
 564                         /* Deal with mount points that are obstructed by a later mount */
 565                         r = path_is_mount_point(x, NULL, 0);
 566                         if (IN_SET(r, 0, -ENOENT))
 567                                 continue;
 568                         if (r < 0)
 569                                 return r;
 570
 571                         /* Try to reuse the original flag set */
 572                         orig_flags = 0;
 573                         (void) get_mount_flags(x, &orig_flags);
 574                         orig_flags &= ~MS_RDONLY;
 575
 576                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 577                                 return -errno;
 578
 579                         log_debug("Remounted %s read-only.", x);
 580                 }
 581         }
 582 }
 583
 584 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 585         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 586
 587         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 588         if (!proc_self_mountinfo)
 589                 return -errno;
 590
 591         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 592
 593         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 594 }
 595
 596 int mount_move_root(const char *path) {
 597         assert(path);
 598
 599         if (chdir(path) < 0)
 600                 return -errno;
 601
 602         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 603                 return -errno;
 604
 605         if (chroot(".") < 0)
 606                 return -errno;
 607
 608         if (chdir("/") < 0)
 609                 return -errno;
 610
 611         return 0;
 612 }
 613
 614 bool fstype_is_network(const char *fstype) {
 615         const char *x;
 616
 617         x = startswith(fstype, "fuse.");
 618         if (x)
 619                 fstype = x;
 620
 621         return STR_IN_SET(fstype,
 622                           "afs",
 623                           "cifs",
 624                           "smbfs",
 625                           "sshfs",
 626                           "ncpfs",
 627                           "ncp",
 628                           "nfs",
 629                           "nfs4",
 630                           "gfs",
 631                           "gfs2",
 632                           "glusterfs",
 633                           "pvfs2", /* OrangeFS */
 634                           "ocfs2",
 635                           "lustre");
 636 }
 637
 638 bool fstype_is_api_vfs(const char *fstype) {
 639         return STR_IN_SET(fstype,
 640                           "autofs",
 641                           "bpf",
 642                           "cgroup",
 643                           "cgroup2",
 644                           "configfs",
 645                           "cpuset",
 646                           "debugfs",
 647                           "devpts",
 648                           "devtmpfs",
 649                           "efivarfs",
 650                           "fusectl",
 651                           "hugetlbfs",
 652                           "mqueue",
 653                           "proc",
 654                           "pstore",
 655                           "ramfs",
 656                           "securityfs",
 657                           "sysfs",
 658                           "tmpfs",
 659                           "tracefs");
 660 }
 661
 662 bool fstype_is_ro(const char *fstype) {
 663         /* All Linux file systems that are necessarily read-only */
 664         return STR_IN_SET(fstype,
 665                           "DM_verity_hash",
 666                           "iso9660",
 667                           "squashfs");
 668 }
 669
 670 bool fstype_can_discard(const char *fstype) {
 671         return STR_IN_SET(fstype,
 672                           "btrfs",
 673                           "ext4",
 674                           "vfat",
 675                           "xfs");
 676 }
 677
 678 bool fstype_can_uid_gid(const char *fstype) {
 679
 680         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 681          * current and future. */
 682
 683         return STR_IN_SET(fstype,
 684                           "adfs",
 685                           "fat",
 686                           "hfs",
 687                           "hpfs",
 688                           "iso9660",
 689                           "msdos",
 690                           "ntfs",
 691                           "vfat");
 692 }
 693
 694 int repeat_unmount(const char *path, int flags) {
 695         bool done = false;
 696
 697         assert(path);
 698
 699         /* If there are multiple mounts on a mount point, this
 700          * removes them all */
 701
 702         for (;;) {
 703                 if (umount2(path, flags) < 0) {
 704
 705                         if (errno == EINVAL)
 706                                 return done;
 707
 708                         return -errno;
 709                 }
 710
 711                 done = true;
 712         }
 713 }
 714
 715 const char* mode_to_inaccessible_node(mode_t mode) {
 716         /* This function maps a node type to the correspondent inaccessible node type.
 717          * Character and block inaccessible devices may not be created (because major=0 and minor=0),
 718          * in such case we map character and block devices to the inaccessible node type socket. */
 719         switch(mode & S_IFMT) {
 720                 case S_IFREG:
 721                         return "/run/systemd/inaccessible/reg";
 722                 case S_IFDIR:
 723                         return "/run/systemd/inaccessible/dir";
 724                 case S_IFCHR:
 725                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 726                                 return "/run/systemd/inaccessible/chr";
 727                         return "/run/systemd/inaccessible/sock";
 728                 case S_IFBLK:
 729                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 730                                 return "/run/systemd/inaccessible/blk";
 731                         return "/run/systemd/inaccessible/sock";
 732                 case S_IFIFO:
 733                         return "/run/systemd/inaccessible/fifo";
 734                 case S_IFSOCK:
 735                         return "/run/systemd/inaccessible/sock";
 736         }
 737         return NULL;
 738 }
 739
 740 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 741 static char* mount_flags_to_string(long unsigned flags) {
 742         char *x;
 743         _cleanup_free_ char *y = NULL;
 744         long unsigned overflow;
 745
 746         overflow = flags & ~(MS_RDONLY |
 747                              MS_NOSUID |
 748                              MS_NODEV |
 749                              MS_NOEXEC |
 750                              MS_SYNCHRONOUS |
 751                              MS_REMOUNT |
 752                              MS_MANDLOCK |
 753                              MS_DIRSYNC |
 754                              MS_NOATIME |
 755                              MS_NODIRATIME |
 756                              MS_BIND |
 757                              MS_MOVE |
 758                              MS_REC |
 759                              MS_SILENT |
 760                              MS_POSIXACL |
 761                              MS_UNBINDABLE |
 762                              MS_PRIVATE |
 763                              MS_SLAVE |
 764                              MS_SHARED |
 765                              MS_RELATIME |
 766                              MS_KERNMOUNT |
 767                              MS_I_VERSION |
 768                              MS_STRICTATIME |
 769                              MS_LAZYTIME);
 770
 771         if (flags == 0 || overflow != 0)
 772                 if (asprintf(&y, "%lx", overflow) < 0)
 773                         return NULL;
 774
 775         x = strjoin(FLAG(MS_RDONLY),
 776                     FLAG(MS_NOSUID),
 777                     FLAG(MS_NODEV),
 778                     FLAG(MS_NOEXEC),
 779                     FLAG(MS_SYNCHRONOUS),
 780                     FLAG(MS_REMOUNT),
 781                     FLAG(MS_MANDLOCK),
 782                     FLAG(MS_DIRSYNC),
 783                     FLAG(MS_NOATIME),
 784                     FLAG(MS_NODIRATIME),
 785                     FLAG(MS_BIND),
 786                     FLAG(MS_MOVE),
 787                     FLAG(MS_REC),
 788                     FLAG(MS_SILENT),
 789                     FLAG(MS_POSIXACL),
 790                     FLAG(MS_UNBINDABLE),
 791                     FLAG(MS_PRIVATE),
 792                     FLAG(MS_SLAVE),
 793                     FLAG(MS_SHARED),
 794                     FLAG(MS_RELATIME),
 795                     FLAG(MS_KERNMOUNT),
 796                     FLAG(MS_I_VERSION),
 797                     FLAG(MS_STRICTATIME),
 798                     FLAG(MS_LAZYTIME),
 799                     y);
 800         if (!x)
 801                 return NULL;
 802         if (!y)
 803                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 804         return x;
 805 }
 806
 807 int mount_verbose(
 808                 int error_log_level,
 809                 const char *what,
 810                 const char *where,
 811                 const char *type,
 812                 unsigned long flags,
 813                 const char *options) {
 814
 815         _cleanup_free_ char *fl = NULL, *o = NULL;
 816         unsigned long f;
 817         int r;
 818
 819         r = mount_option_mangle(options, flags, &f, &o);
 820         if (r < 0)
 821                 return log_full_errno(error_log_level, r,
 822                                       "Failed to mangle mount options %s: %m",
 823                                       strempty(options));
 824
 825         fl = mount_flags_to_string(f);
 826
 827         if ((f & MS_REMOUNT) && !what && !type)
 828                 log_debug("Remounting %s (%s \"%s\")...",
 829                           where, strnull(fl), strempty(o));
 830         else if (!what && !type)
 831                 log_debug("Mounting %s (%s \"%s\")...",
 832                           where, strnull(fl), strempty(o));
 833         else if ((f & MS_BIND) && !type)
 834                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 835                           what, where, strnull(fl), strempty(o));
 836         else if (f & MS_MOVE)
 837                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 838                           what, where, strnull(fl), strempty(o));
 839         else
 840                 log_debug("Mounting %s on %s (%s \"%s\")...",
 841                           strna(type), where, strnull(fl), strempty(o));
 842         if (mount(what, where, type, f, o) < 0)
 843                 return log_full_errno(error_log_level, errno,
 844                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 845                                       strna(type), where, strnull(fl), strempty(o));
 846         return 0;
 847 }
 848
 849 int umount_verbose(const char *what) {
 850         log_debug("Umounting %s...", what);
 851         if (umount(what) < 0)
 852                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 853         return 0;
 854 }
 855
 856 const char *mount_propagation_flags_to_string(unsigned long flags) {
 857
 858         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 859         case 0:
 860                 return "";
 861         case MS_SHARED:
 862                 return "shared";
 863         case MS_SLAVE:
 864                 return "slave";
 865         case MS_PRIVATE:
 866                 return "private";
 867         }
 868
 869         return NULL;
 870 }
 871
 872
 873 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 874
 875         if (isempty(name))
 876                 *ret = 0;
 877         else if (streq(name, "shared"))
 878                 *ret = MS_SHARED;
 879         else if (streq(name, "slave"))
 880                 *ret = MS_SLAVE;
 881         else if (streq(name, "private"))
 882                 *ret = MS_PRIVATE;
 883         else
 884                 return -EINVAL;
 885         return 0;
 886 }
 887
 888 int mount_option_mangle(
 889                 const char *options,
 890                 unsigned long mount_flags,
 891                 unsigned long *ret_mount_flags,
 892                 char **ret_remaining_options) {
 893
 894         const struct libmnt_optmap *map;
 895         _cleanup_free_ char *ret = NULL;
 896         const char *p;
 897         int r;
 898
 899         /* This extracts mount flags from the mount options, and store
 900          * non-mount-flag options to '*ret_remaining_options'.
 901          * E.g.,
 902          * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
 903          * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
 904          * "size=1630748k,mode=700,uid=1000,gid=1000".
 905          * See more examples in test-mount-utils.c.
 906          *
 907          * Note that if 'options' does not contain any non-mount-flag options,
 908          * then '*ret_remaining_options' is set to NULL instread of empty string.
 909          * Note that this does not check validity of options stored in
 910          * '*ret_remaining_options'.
 911          * Note that if 'options' is NULL, then this just copies 'mount_flags'
 912          * to '*ret_mount_flags'. */
 913
 914         assert(ret_mount_flags);
 915         assert(ret_remaining_options);
 916
 917         map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
 918         if (!map)
 919                 return -EINVAL;
 920
 921         p = options;
 922         for (;;) {
 923                 _cleanup_free_ char *word = NULL;
 924                 const struct libmnt_optmap *ent;
 925
 926                 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
 927                 if (r < 0)
 928                         return r;
 929                 if (r == 0)
 930                         break;
 931
 932                 for (ent = map; ent->name; ent++) {
 933                         /* All entries in MNT_LINUX_MAP do not take any argument.
 934                          * Thus, ent->name does not contain "=" or "[=]". */
 935                         if (!streq(word, ent->name))
 936                                 continue;
 937
 938                         if (!(ent->mask & MNT_INVERT))
 939                                 mount_flags |= ent->id;
 940                         else if (mount_flags & ent->id)
 941                                 mount_flags ^= ent->id;
 942
 943                         break;
 944                 }
 945
 946                 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
 947                 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
 948                         return -ENOMEM;
 949         }
 950
 951         *ret_mount_flags = mount_flags;
 952         *ret_remaining_options = TAKE_PTR(ret);
 953
 954         return 0;
 955 }