src/basic/mountpoint-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <sys/mount.h>
   6 #if WANT_LINUX_FS_H
   7 #include <linux/fs.h>
   8 #endif
   9
  10 #include "alloc-util.h"
  11 #include "chase.h"
  12 #include "fd-util.h"
  13 #include "fileio.h"
  14 #include "filesystems.h"
  15 #include "fs-util.h"
  16 #include "missing_fs.h"
  17 #include "missing_mount.h"
  18 #include "missing_stat.h"
  19 #include "missing_syscall.h"
  20 #include "mkdir.h"
  21 #include "mountpoint-util.h"
  22 #include "nulstr-util.h"
  23 #include "parse-util.h"
  24 #include "path-util.h"
  25 #include "stat-util.h"
  26 #include "stdio-util.h"
  27 #include "strv.h"
  28 #include "user-util.h"
  29
  30 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  31  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  32  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  33  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  34  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  35  * with large file handles anyway. */
  36 #define ORIGINAL_MAX_HANDLE_SZ 128
  37
  38 int name_to_handle_at_loop(
  39                 int fd,
  40                 const char *path,
  41                 struct file_handle **ret_handle,
  42                 int *ret_mnt_id,
  43                 int flags) {
  44
  45         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  46
  47         assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
  48
  49         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  50          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  51          * start value, it is not an upper bound on the buffer size required.
  52          *
  53          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  54          * as NULL if there's no interest in either. */
  55
  56         for (;;) {
  57                 _cleanup_free_ struct file_handle *h = NULL;
  58                 int mnt_id = -1;
  59
  60                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  61                 if (!h)
  62                         return -ENOMEM;
  63
  64                 h->handle_bytes = n;
  65
  66                 if (name_to_handle_at(fd, strempty(path), h, &mnt_id, flags) >= 0) {
  67
  68                         if (ret_handle)
  69                                 *ret_handle = TAKE_PTR(h);
  70
  71                         if (ret_mnt_id)
  72                                 *ret_mnt_id = mnt_id;
  73
  74                         return 0;
  75                 }
  76                 if (errno != EOVERFLOW)
  77                         return -errno;
  78
  79                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  80
  81                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  82                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  83                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  84
  85                         *ret_mnt_id = mnt_id;
  86                         return 0;
  87                 }
  88
  89                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
  90                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
  91                  * buffer. In that case propagate EOVERFLOW */
  92                 if (h->handle_bytes <= n)
  93                         return -EOVERFLOW;
  94
  95                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
  96                 n = h->handle_bytes;
  97
  98                 /* paranoia: check for overflow (note that .handle_bytes is unsigned only) */
  99                 if (n > UINT_MAX - offsetof(struct file_handle, f_handle))
 100                         return -EOVERFLOW;
 101         }
 102 }
 103
 104 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
 105         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 106         _cleanup_free_ char *fdinfo = NULL;
 107         _cleanup_close_ int subfd = -EBADF;
 108         char *p;
 109         int r;
 110
 111         assert(ret_mnt_id);
 112         assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
 113
 114         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 115                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 116         else {
 117                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
 118                 if (subfd < 0)
 119                         return -errno;
 120
 121                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 122         }
 123
 124         r = read_full_virtual_file(path, &fdinfo, NULL);
 125         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 126                 return proc_mounted() > 0 ? -EOPNOTSUPP : -ENOSYS;
 127         if (r < 0)
 128                 return r;
 129
 130         p = find_line_startswith(fdinfo, "mnt_id:");
 131         if (!p) /* The mnt_id field is a relatively new addition */
 132                 return -EOPNOTSUPP;
 133
 134         p += strspn(p, WHITESPACE);
 135         p[strcspn(p, WHITESPACE)] = 0;
 136
 137         return safe_atoi(p, ret_mnt_id);
 138 }
 139
 140 static bool filename_possibly_with_slash_suffix(const char *s) {
 141         const char *slash, *copied;
 142
 143         /* Checks whether the specified string is either file name, or a filename with a suffix of
 144          * slashes. But nothing else.
 145          *
 146          * this is OK: foo, bar, foo/, bar/, foo//, bar///
 147          * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */
 148
 149         slash = strchr(s, '/');
 150         if (!slash)
 151                 return filename_is_valid(s);
 152
 153         if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */
 154                 return false;
 155
 156         if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
 157                 return false;
 158
 159         copied = strndupa_safe(s, slash - s);
 160         return filename_is_valid(copied);
 161 }
 162
 163 static bool is_name_to_handle_at_fatal_error(int err) {
 164         /* name_to_handle_at() can return "acceptable" errors that are due to the context. For
 165          * example the kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall
 166          * was blocked (EACCES/EPERM; maybe through seccomp, because we are running inside of a
 167          * container), or the mount point is not triggered yet (EOVERFLOW, think nfs4), or some
 168          * general name_to_handle_at() flakiness (EINVAL). However other errors are not supposed to
 169          * happen and therefore are considered fatal ones. */
 170
 171         assert(err < 0);
 172
 173         return !IN_SET(err, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL);
 174 }
 175
 176 int fd_is_mount_point(int fd, const char *filename, int flags) {
 177         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 178         int mount_id = -1, mount_id_parent = -1;
 179         bool nosupp = false, check_st_dev = true;
 180         STRUCT_STATX_DEFINE(sx);
 181         struct stat a, b;
 182         int r;
 183
 184         assert(fd >= 0);
 185         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 186
 187         if (!filename) {
 188                 /* If the file name is specified as NULL we'll see if the specified 'fd' is a mount
 189                  * point. That's only supported if the kernel supports statx(), or if the inode specified via
 190                  * 'fd' refers to a directory. Otherwise, we'll have to fail (ENOTDIR), because we have no
 191                  * kernel API to query the information we need. */
 192                 flags |= AT_EMPTY_PATH;
 193                 filename = "";
 194         } else if (!filename_possibly_with_slash_suffix(filename))
 195                 /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode further
 196                  * up or down the tree then immediately below the specified directory fd. */
 197                 return -EINVAL;
 198
 199         /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
 200          * since kernel 5.8.
 201          *
 202          * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
 203          * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
 204          * all file systems are hooked up). If it works the mount id is usually good enough to tell us
 205          * whether something is a mount point.
 206          *
 207          * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
 208          * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
 209          * handle is pretty useful to detect the root directory, which we should always consider a mount
 210          * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
 211          * kernel addition.
 212          *
 213          * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
 214          * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
 215          * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
 216          * their own. */
 217
 218         if (statx(fd,
 219                   filename,
 220                   (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
 221                   (flags & AT_EMPTY_PATH) |
 222                   AT_NO_AUTOMOUNT |            /* don't trigger automounts – mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */
 223                   AT_STATX_DONT_SYNC,          /* don't go to the network for this – for similar reasons */
 224                   STATX_TYPE,
 225                   &sx) < 0) {
 226                 if (!ERRNO_IS_NOT_SUPPORTED(errno) && /* statx() is not supported by the kernel. */
 227                     !ERRNO_IS_PRIVILEGE(errno) &&     /* maybe filtered by seccomp. */
 228                     errno != EINVAL)                  /* glibc's fallback method returns EINVAL when AT_STATX_DONT_SYNC is set. */
 229                         return -errno;
 230
 231                 /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
 232         } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
 233                 return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
 234         else if (FLAGS_SET(sx.stx_mask, STATX_TYPE) && S_ISLNK(sx.stx_mode))
 235                 return false; /* symlinks are never mount points */
 236
 237         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 238         if (r < 0) {
 239                 if (is_name_to_handle_at_fatal_error(r))
 240                         return r;
 241                 if (r != -EOPNOTSUPP)
 242                         goto fallback_fdinfo;
 243
 244                 /* This kernel or file system does not support name_to_handle_at(), hence let's see
 245                  * if the upper fs supports it (in which case it is a mount point), otherwise fall
 246                  * back to the traditional stat() logic */
 247                 nosupp = true;
 248         }
 249
 250         if (isempty(filename))
 251                 r = name_to_handle_at_loop(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
 252         else
 253                 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 254         if (r < 0) {
 255                 if (is_name_to_handle_at_fatal_error(r))
 256                         return r;
 257                 if (r != -EOPNOTSUPP)
 258                         goto fallback_fdinfo;
 259                 if (nosupp)
 260                         /* Both the parent and the directory can't do name_to_handle_at() */
 261                         goto fallback_fdinfo;
 262
 263                 /* The parent can't do name_to_handle_at() but the directory we are
 264                  * interested in can?  If so, it must be a mount point. */
 265                 return 1;
 266         }
 267
 268         /* The parent can do name_to_handle_at() but the directory we are interested in can't? If
 269          * so, it must be a mount point. */
 270         if (nosupp)
 271                 return 1;
 272
 273         /* If the file handle for the directory we are interested in and its parent are identical,
 274          * we assume this is the root directory, which is a mount point. */
 275
 276         if (h->handle_type == h_parent->handle_type &&
 277             memcmp_nn(h->f_handle, h->handle_bytes,
 278                       h_parent->f_handle, h_parent->handle_bytes) == 0)
 279                 return 1;
 280
 281         return mount_id != mount_id_parent;
 282
 283 fallback_fdinfo:
 284         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 285         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM, -ENOSYS))
 286                 goto fallback_fstat;
 287         if (r < 0)
 288                 return r;
 289
 290         if (isempty(filename))
 291                 r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */
 292         else
 293                 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 294         if (r < 0)
 295                 return r;
 296
 297         if (mount_id != mount_id_parent)
 298                 return 1;
 299
 300         /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file
 301          * system. For that, let's see if the parent directory has the same inode as we are interested
 302          * in. Hence, let's also do fstat() checks now, too, but avoid the st_dev comparisons, since they
 303          * aren't that useful on unionfs mounts. */
 304         check_st_dev = false;
 305
 306 fallback_fstat:
 307         /* yay for fstatat() taking a different set of flags than the other _at() above */
 308         if (flags & AT_SYMLINK_FOLLOW)
 309                 flags &= ~AT_SYMLINK_FOLLOW;
 310         else
 311                 flags |= AT_SYMLINK_NOFOLLOW;
 312         if (fstatat(fd, filename, &a, flags) < 0)
 313                 return -errno;
 314         if (S_ISLNK(a.st_mode)) /* Symlinks are never mount points */
 315                 return false;
 316
 317         if (isempty(filename))
 318                 r = fstatat(fd, "..", &b, 0);
 319         else
 320                 r = fstatat(fd, "", &b, AT_EMPTY_PATH);
 321         if (r < 0)
 322                 return -errno;
 323
 324         /* A directory with same device and inode as its parent? Must be the root directory */
 325         if (stat_inode_same(&a, &b))
 326                 return 1;
 327
 328         return check_st_dev && (a.st_dev != b.st_dev);
 329 }
 330
 331 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 332 int path_is_mount_point(const char *t, const char *root, int flags) {
 333         _cleanup_free_ char *canonical = NULL;
 334         _cleanup_close_ int fd = -EBADF;
 335         int r;
 336
 337         assert(t);
 338         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 339
 340         if (path_equal(t, "/"))
 341                 return 1;
 342
 343         /* we need to resolve symlinks manually, we can't just rely on
 344          * fd_is_mount_point() to do that for us; if we have a structure like
 345          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 346          * look at needs to be /usr, not /. */
 347         if (flags & AT_SYMLINK_FOLLOW) {
 348                 r = chase(t, root, CHASE_TRAIL_SLASH, &canonical, NULL);
 349                 if (r < 0)
 350                         return r;
 351
 352                 t = canonical;
 353         }
 354
 355         fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
 356         if (fd < 0)
 357                 return fd;
 358
 359         return fd_is_mount_point(fd, last_path_component(t), flags);
 360 }
 361
 362 int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret) {
 363         int r;
 364
 365         assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
 366         assert(ret);
 367
 368         r = name_to_handle_at_loop(dir_fd, path, NULL, ret, isempty(path) ? AT_EMPTY_PATH : 0);
 369         if (r == 0 || is_name_to_handle_at_fatal_error(r))
 370                 return r;
 371
 372         return fd_fdinfo_mnt_id(dir_fd, path, isempty(path) ? AT_EMPTY_PATH : 0, ret);
 373 }
 374
 375 int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
 376         STRUCT_NEW_STATX_DEFINE(buf);
 377
 378         assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
 379         assert(ret);
 380
 381         if (statx(dir_fd,
 382                   strempty(path),
 383                   (isempty(path) ? AT_EMPTY_PATH : AT_SYMLINK_NOFOLLOW) |
 384                   AT_NO_AUTOMOUNT |    /* don't trigger automounts, mnt_id is a local concept */
 385                   AT_STATX_DONT_SYNC,  /* don't go to the network, mnt_id is a local concept */
 386                   STATX_MNT_ID,
 387                   &buf.sx) < 0) {
 388                 if (!ERRNO_IS_NOT_SUPPORTED(errno) && /* statx() is not supported by the kernel. */
 389                     !ERRNO_IS_PRIVILEGE(errno) &&     /* maybe filtered by seccomp. */
 390                     errno != EINVAL)                  /* glibc's fallback method returns EINVAL when AT_STATX_DONT_SYNC is set. */
 391                         return -errno;
 392
 393                 /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
 394                  * privileges */
 395
 396         } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
 397                 *ret = buf.nsx.stx_mnt_id;
 398                 return 0;
 399         }
 400
 401         return path_get_mnt_id_at_fallback(dir_fd, path, ret);
 402 }
 403
 404 bool fstype_is_network(const char *fstype) {
 405         const char *x;
 406
 407         x = startswith(fstype, "fuse.");
 408         if (x)
 409                 fstype = x;
 410
 411         if (nulstr_contains(filesystem_sets[FILESYSTEM_SET_NETWORK].value, fstype))
 412                 return true;
 413
 414         /* Filesystems not present in the internal database */
 415         return STR_IN_SET(fstype,
 416                           "davfs",
 417                           "glusterfs",
 418                           "lustre",
 419                           "sshfs");
 420 }
 421
 422 bool fstype_needs_quota(const char *fstype) {
 423        /* 1. quotacheck needs to be run for some filesystems after they are mounted
 424         *    if the filesystem was not unmounted cleanly.
 425         * 2. You may need to run quotaon to enable quota usage tracking and/or
 426         *    enforcement.
 427         * ext2     - needs 1) and 2)
 428         * ext3     - needs 2) if configured using usrjquota/grpjquota mount options
 429         * ext4     - needs 1) if created without journal, needs 2) if created without QUOTA
 430         *            filesystem feature
 431         * reiserfs - needs 2).
 432         * jfs      - needs 2)
 433         * f2fs     - needs 2) if configured using usrjquota/grpjquota/prjjquota mount options
 434         * xfs      - nothing needed
 435         * gfs2     - nothing needed
 436         * ocfs2    - nothing needed
 437         * btrfs    - nothing needed
 438         * for reference see filesystem and quota manpages */
 439         return STR_IN_SET(fstype,
 440                           "ext2",
 441                           "ext3",
 442                           "ext4",
 443                           "reiserfs",
 444                           "jfs",
 445                           "f2fs");
 446 }
 447
 448 bool fstype_is_api_vfs(const char *fstype) {
 449         const FilesystemSet *fs;
 450
 451         FOREACH_POINTER(fs,
 452                 filesystem_sets + FILESYSTEM_SET_BASIC_API,
 453                 filesystem_sets + FILESYSTEM_SET_AUXILIARY_API,
 454                 filesystem_sets + FILESYSTEM_SET_PRIVILEGED_API,
 455                 filesystem_sets + FILESYSTEM_SET_TEMPORARY)
 456             if (nulstr_contains(fs->value, fstype))
 457                     return true;
 458
 459         /* Filesystems not present in the internal database */
 460         return STR_IN_SET(fstype,
 461                           "autofs",
 462                           "cpuset",
 463                           "devtmpfs");
 464 }
 465
 466 bool fstype_is_blockdev_backed(const char *fstype) {
 467         const char *x;
 468
 469         x = startswith(fstype, "fuse.");
 470         if (x)
 471                 fstype = x;
 472
 473         return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
 474 }
 475
 476 bool fstype_is_ro(const char *fstype) {
 477         /* All Linux file systems that are necessarily read-only */
 478         return STR_IN_SET(fstype,
 479                           "DM_verity_hash",
 480                           "cramfs",
 481                           "erofs",
 482                           "iso9660",
 483                           "squashfs");
 484 }
 485
 486 bool fstype_can_discard(const char *fstype) {
 487         assert(fstype);
 488
 489         /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
 490          * not be allowed in our MAC context. */
 491         if (STR_IN_SET(fstype, "btrfs", "f2fs", "ext4", "vfat", "xfs"))
 492                 return true;
 493
 494         /* On new kernels we can just ask the kernel */
 495         return mount_option_supported(fstype, "discard", NULL) > 0;
 496 }
 497
 498 bool fstype_can_norecovery(const char *fstype) {
 499         assert(fstype);
 500
 501         /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
 502          * not be allowed in our MAC context. */
 503         if (STR_IN_SET(fstype, "ext3", "ext4", "xfs", "btrfs"))
 504                 return true;
 505
 506         /* On new kernels we can just ask the kernel */
 507         return mount_option_supported(fstype, "norecovery", NULL) > 0;
 508 }
 509
 510 bool fstype_can_umask(const char *fstype) {
 511         assert(fstype);
 512
 513         /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
 514          * not be allowed in our MAC context. If we don't know ourselves, on new kernels we can just ask the
 515          * kernel. */
 516         return streq(fstype, "vfat") || mount_option_supported(fstype, "umask", "0077") > 0;
 517 }
 518
 519 bool fstype_can_uid_gid(const char *fstype) {
 520         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and
 521          * directories, current and future. Note that this does *not* ask the kernel via
 522          * mount_option_supported() here because the uid=/gid= setting of various file systems mean different
 523          * things: some apply it only to the root dir inode, others to all inodes in the file system. Thus we
 524          * maintain the curated list below. 😢 */
 525
 526         return STR_IN_SET(fstype,
 527                           "adfs",
 528                           "exfat",
 529                           "fat",
 530                           "hfs",
 531                           "hpfs",
 532                           "iso9660",
 533                           "msdos",
 534                           "ntfs",
 535                           "vfat");
 536 }
 537
 538 int dev_is_devtmpfs(void) {
 539         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 540         int mount_id, r;
 541         char *e;
 542
 543         r = path_get_mnt_id("/dev", &mount_id);
 544         if (r < 0)
 545                 return r;
 546
 547         r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
 548         if (r == -ENOENT)
 549                 return proc_mounted() > 0 ? -ENOENT : -ENOSYS;
 550         if (r < 0)
 551                 return r;
 552
 553         for (;;) {
 554                 _cleanup_free_ char *line = NULL;
 555                 int mid;
 556
 557                 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
 558                 if (r < 0)
 559                         return r;
 560                 if (r == 0)
 561                         break;
 562
 563                 if (sscanf(line, "%i", &mid) != 1)
 564                         continue;
 565
 566                 if (mid != mount_id)
 567                         continue;
 568
 569                 e = strstrafter(line, " - ");
 570                 if (!e)
 571                         continue;
 572
 573                 /* accept any name that starts with the currently expected type */
 574                 if (startswith(e, "devtmpfs"))
 575                         return true;
 576         }
 577
 578         return false;
 579 }
 580
 581 int mount_fd(const char *source,
 582              int target_fd,
 583              const char *filesystemtype,
 584              unsigned long mountflags,
 585              const void *data) {
 586
 587         if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
 588                 if (errno != ENOENT)
 589                         return -errno;
 590
 591                 /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
 592                  * mounted. Check for the latter to generate better error messages. */
 593                 if (proc_mounted() == 0)
 594                         return -ENOSYS;
 595
 596                 return -ENOENT;
 597         }
 598
 599         return 0;
 600 }
 601
 602 int mount_nofollow(
 603                 const char *source,
 604                 const char *target,
 605                 const char *filesystemtype,
 606                 unsigned long mountflags,
 607                 const void *data) {
 608
 609         _cleanup_close_ int fd = -EBADF;
 610
 611         /* In almost all cases we want to manipulate the mount table without following symlinks, hence
 612          * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
 613          * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
 614          * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
 615          * fs to mount) we can only use traditional mount() directly.
 616          *
 617          * Note that this disables following only for the final component of the target, i.e symlinks within
 618          * the path of the target are honoured, as are symlinks in the source path everywhere. */
 619
 620         fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
 621         if (fd < 0)
 622                 return -errno;
 623
 624         return mount_fd(source, fd, filesystemtype, mountflags, data);
 625 }
 626
 627 const char *mount_propagation_flag_to_string(unsigned long flags) {
 628
 629         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 630         case 0:
 631                 return "";
 632         case MS_SHARED:
 633                 return "shared";
 634         case MS_SLAVE:
 635                 return "slave";
 636         case MS_PRIVATE:
 637                 return "private";
 638         }
 639
 640         return NULL;
 641 }
 642
 643 int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
 644
 645         if (isempty(name))
 646                 *ret = 0;
 647         else if (streq(name, "shared"))
 648                 *ret = MS_SHARED;
 649         else if (streq(name, "slave"))
 650                 *ret = MS_SLAVE;
 651         else if (streq(name, "private"))
 652                 *ret = MS_PRIVATE;
 653         else
 654                 return -EINVAL;
 655         return 0;
 656 }
 657
 658 bool mount_propagation_flag_is_valid(unsigned long flag) {
 659         return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
 660 }
 661
 662 bool mount_new_api_supported(void) {
 663         static int cache = -1;
 664         int r;
 665
 666         if (cache >= 0)
 667                 return cache;
 668
 669         /* This is the newer API among the ones we use, so use it as boundary */
 670         r = RET_NERRNO(mount_setattr(-EBADF, NULL, 0, NULL, 0));
 671         if (r == 0 || ERRNO_IS_NOT_SUPPORTED(r)) /* This should return an error if it is working properly */
 672                 return (cache = false);
 673
 674         return (cache = true);
 675 }
 676
 677 unsigned long ms_nosymfollow_supported(void) {
 678         _cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
 679         static int cache = -1;
 680
 681         /* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
 682
 683         if (cache >= 0)
 684                 return cache ? MS_NOSYMFOLLOW : 0;
 685
 686         if (!mount_new_api_supported())
 687                 goto not_supported;
 688
 689         /* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
 690          * mount_setattr() call for that, which was added in 5.12, which is close enough. */
 691
 692         fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
 693         if (fsfd < 0) {
 694                 if (ERRNO_IS_NOT_SUPPORTED(errno))
 695                         goto not_supported;
 696
 697                 log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
 698                 return 0;
 699         }
 700
 701         if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
 702                 if (ERRNO_IS_NOT_SUPPORTED(errno))
 703                         goto not_supported;
 704
 705                 log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
 706                 return 0;
 707         }
 708
 709         mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
 710         if (mntfd < 0) {
 711                 if (ERRNO_IS_NOT_SUPPORTED(errno))
 712                         goto not_supported;
 713
 714                 log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
 715                 return 0;
 716         }
 717
 718         if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
 719                           &(struct mount_attr) {
 720                                   .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
 721                           }, sizeof(struct mount_attr)) < 0) {
 722                 if (ERRNO_IS_NOT_SUPPORTED(errno))
 723                         goto not_supported;
 724
 725                 log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
 726                 return 0;
 727         }
 728
 729         cache = true;
 730         return MS_NOSYMFOLLOW;
 731
 732 not_supported:
 733         cache = false;
 734         return 0;
 735 }
 736
 737 int mount_option_supported(const char *fstype, const char *key, const char *value) {
 738         _cleanup_close_ int fd = -EBADF;
 739         int r;
 740
 741         /* Checks if the specified file system supports a mount option. Returns > 0 if it supports it, == 0 if
 742          * it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
 743
 744         assert(fstype);
 745         assert(key);
 746
 747         fd = fsopen(fstype, FSOPEN_CLOEXEC);
 748         if (fd < 0) {
 749                 if (ERRNO_IS_NOT_SUPPORTED(errno))
 750                         return -EAGAIN;  /* new mount API not available → don't know */
 751
 752                 return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
 753         }
 754
 755         /* Various file systems have not been converted to the new mount API yet. For such file systems
 756          * fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
 757          * use it for testing support, after all. Let's hence do a check if the file system got converted yet
 758          * first. */
 759         if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
 760                 /* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
 761                  * the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
 762                  * is converted. */
 763                 if (errno == EOPNOTSUPP)
 764                         return -EAGAIN; /* FSCONFIG_SET_FD not supported on the fs, hence not converted to new mount API → don't know */
 765                 if (errno != EINVAL)
 766                         return log_debug_errno(errno, "Failed to check if file system has been converted to new mount API: %m");
 767
 768                 /* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
 769                  * is converted. Let's now ask the actual question we wonder about. */
 770         } else
 771                 return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
 772
 773         if (value)
 774                 r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
 775         else
 776                 r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
 777         if (r < 0) {
 778                 if (errno == EINVAL)
 779                         return false; /* EINVAL means option not supported. */
 780
 781                 return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
 782                                        key, value ? "=" : "", strempty(value), fstype);
 783         }
 784
 785         return true; /* works! */
 786 }