src/basic/mountpoint-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <stdio_ext.h>
   6 #include <sys/mount.h>
   7
   8 #include "alloc-util.h"
   9 #include "fd-util.h"
  10 #include "fileio.h"
  11 #include "fs-util.h"
  12 #include "missing.h"
  13 #include "mountpoint-util.h"
  14 #include "parse-util.h"
  15 #include "path-util.h"
  16 #include "stdio-util.h"
  17 #include "strv.h"
  18
  19 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  20  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  21  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  22  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  23  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  24  * with large file handles anyway. */
  25 #define ORIGINAL_MAX_HANDLE_SZ 128
  26
  27 int name_to_handle_at_loop(
  28                 int fd,
  29                 const char *path,
  30                 struct file_handle **ret_handle,
  31                 int *ret_mnt_id,
  32                 int flags) {
  33
  34         _cleanup_free_ struct file_handle *h = NULL;
  35         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  36
  37         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  38          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  39          * start value, it is not an upper bound on the buffer size required.
  40          *
  41          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  42          * as NULL if there's no interest in either. */
  43
  44         for (;;) {
  45                 int mnt_id = -1;
  46
  47                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  48                 if (!h)
  49                         return -ENOMEM;
  50
  51                 h->handle_bytes = n;
  52
  53                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  54
  55                         if (ret_handle)
  56                                 *ret_handle = TAKE_PTR(h);
  57
  58                         if (ret_mnt_id)
  59                                 *ret_mnt_id = mnt_id;
  60
  61                         return 0;
  62                 }
  63                 if (errno != EOVERFLOW)
  64                         return -errno;
  65
  66                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  67
  68                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  69                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  70                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  71
  72                         *ret_mnt_id = mnt_id;
  73                         return 0;
  74                 }
  75
  76                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
  77                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
  78                  * buffer. In that case propagate EOVERFLOW */
  79                 if (h->handle_bytes <= n)
  80                         return -EOVERFLOW;
  81
  82                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
  83                 n = h->handle_bytes;
  84                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
  85                         return -EOVERFLOW;
  86
  87                 h = mfree(h);
  88         }
  89 }
  90
  91 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  92         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  93         _cleanup_free_ char *fdinfo = NULL;
  94         _cleanup_close_ int subfd = -1;
  95         char *p;
  96         int r;
  97
  98         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  99                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 100         else {
 101                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
 102                 if (subfd < 0)
 103                         return -errno;
 104
 105                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 106         }
 107
 108         r = read_full_file(path, &fdinfo, NULL);
 109         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 110                 return -EOPNOTSUPP;
 111         if (r < 0)
 112                 return r;
 113
 114         p = startswith(fdinfo, "mnt_id:");
 115         if (!p) {
 116                 p = strstr(fdinfo, "\nmnt_id:");
 117                 if (!p) /* The mnt_id field is a relatively new addition */
 118                         return -EOPNOTSUPP;
 119
 120                 p += 8;
 121         }
 122
 123         p += strspn(p, WHITESPACE);
 124         p[strcspn(p, WHITESPACE)] = 0;
 125
 126         return safe_atoi(p, mnt_id);
 127 }
 128
 129 int fd_is_mount_point(int fd, const char *filename, int flags) {
 130         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 131         int mount_id = -1, mount_id_parent = -1;
 132         bool nosupp = false, check_st_dev = true;
 133         struct stat a, b;
 134         int r;
 135
 136         assert(fd >= 0);
 137         assert(filename);
 138
 139         /* First we will try the name_to_handle_at() syscall, which
 140          * tells us the mount id and an opaque file "handle". It is
 141          * not supported everywhere though (kernel compile-time
 142          * option, not all file systems are hooked up). If it works
 143          * the mount id is usually good enough to tell us whether
 144          * something is a mount point.
 145          *
 146          * If that didn't work we will try to read the mount id from
 147          * /proc/self/fdinfo/<fd>. This is almost as good as
 148          * name_to_handle_at(), however, does not return the
 149          * opaque file handle. The opaque file handle is pretty useful
 150          * to detect the root directory, which we should always
 151          * consider a mount point. Hence we use this only as
 152          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 153          * kernel addition.
 154          *
 155          * As last fallback we do traditional fstat() based st_dev
 156          * comparisons. This is how things were traditionally done,
 157          * but unionfs breaks this since it exposes file
 158          * systems with a variety of st_dev reported. Also, btrfs
 159          * subvolumes have different st_dev, even though they aren't
 160          * real mounts of their own. */
 161
 162         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 163         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 164                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 165                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 166                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 167                  * (EINVAL): fall back to simpler logic. */
 168                 goto fallback_fdinfo;
 169         else if (r == -EOPNOTSUPP)
 170                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 171                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 172                  * logic */
 173                 nosupp = true;
 174         else if (r < 0)
 175                 return r;
 176
 177         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 178         if (r == -EOPNOTSUPP) {
 179                 if (nosupp)
 180                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 181                         goto fallback_fdinfo;
 182                 else
 183                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 184                          * it must be a mount point. */
 185                         return 1;
 186         } else if (r < 0)
 187                 return r;
 188
 189         /* The parent can do name_to_handle_at() but the
 190          * directory we are interested in can't? If so, it
 191          * must be a mount point. */
 192         if (nosupp)
 193                 return 1;
 194
 195         /* If the file handle for the directory we are
 196          * interested in and its parent are identical, we
 197          * assume this is the root directory, which is a mount
 198          * point. */
 199
 200         if (h->handle_bytes == h_parent->handle_bytes &&
 201             h->handle_type == h_parent->handle_type &&
 202             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 203                 return 1;
 204
 205         return mount_id != mount_id_parent;
 206
 207 fallback_fdinfo:
 208         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 209         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 210                 goto fallback_fstat;
 211         if (r < 0)
 212                 return r;
 213
 214         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 215         if (r < 0)
 216                 return r;
 217
 218         if (mount_id != mount_id_parent)
 219                 return 1;
 220
 221         /* Hmm, so, the mount ids are the same. This leaves one
 222          * special case though for the root file system. For that,
 223          * let's see if the parent directory has the same inode as we
 224          * are interested in. Hence, let's also do fstat() checks now,
 225          * too, but avoid the st_dev comparisons, since they aren't
 226          * that useful on unionfs mounts. */
 227         check_st_dev = false;
 228
 229 fallback_fstat:
 230         /* yay for fstatat() taking a different set of flags than the other
 231          * _at() above */
 232         if (flags & AT_SYMLINK_FOLLOW)
 233                 flags &= ~AT_SYMLINK_FOLLOW;
 234         else
 235                 flags |= AT_SYMLINK_NOFOLLOW;
 236         if (fstatat(fd, filename, &a, flags) < 0)
 237                 return -errno;
 238
 239         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 240                 return -errno;
 241
 242         /* A directory with same device and inode as its parent? Must
 243          * be the root directory */
 244         if (a.st_dev == b.st_dev &&
 245             a.st_ino == b.st_ino)
 246                 return 1;
 247
 248         return check_st_dev && (a.st_dev != b.st_dev);
 249 }
 250
 251 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 252 int path_is_mount_point(const char *t, const char *root, int flags) {
 253         _cleanup_free_ char *canonical = NULL;
 254         _cleanup_close_ int fd = -1;
 255         int r;
 256
 257         assert(t);
 258         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 259
 260         if (path_equal(t, "/"))
 261                 return 1;
 262
 263         /* we need to resolve symlinks manually, we can't just rely on
 264          * fd_is_mount_point() to do that for us; if we have a structure like
 265          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 266          * look at needs to be /usr, not /. */
 267         if (flags & AT_SYMLINK_FOLLOW) {
 268                 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
 269                 if (r < 0)
 270                         return r;
 271
 272                 t = canonical;
 273         }
 274
 275         fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
 276         if (fd < 0)
 277                 return -errno;
 278
 279         return fd_is_mount_point(fd, last_path_component(t), flags);
 280 }
 281
 282 int path_get_mnt_id(const char *path, int *ret) {
 283         int r;
 284
 285         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 286         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 287                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 288
 289         return r;
 290 }
 291
 292 bool fstype_is_network(const char *fstype) {
 293         const char *x;
 294
 295         x = startswith(fstype, "fuse.");
 296         if (x)
 297                 fstype = x;
 298
 299         return STR_IN_SET(fstype,
 300                           "afs",
 301                           "cifs",
 302                           "smbfs",
 303                           "sshfs",
 304                           "ncpfs",
 305                           "ncp",
 306                           "nfs",
 307                           "nfs4",
 308                           "gfs",
 309                           "gfs2",
 310                           "glusterfs",
 311                           "pvfs2", /* OrangeFS */
 312                           "ocfs2",
 313                           "lustre");
 314 }
 315
 316 bool fstype_is_api_vfs(const char *fstype) {
 317         return STR_IN_SET(fstype,
 318                           "autofs",
 319                           "bpf",
 320                           "cgroup",
 321                           "cgroup2",
 322                           "configfs",
 323                           "cpuset",
 324                           "debugfs",
 325                           "devpts",
 326                           "devtmpfs",
 327                           "efivarfs",
 328                           "fusectl",
 329                           "hugetlbfs",
 330                           "mqueue",
 331                           "proc",
 332                           "pstore",
 333                           "ramfs",
 334                           "securityfs",
 335                           "sysfs",
 336                           "tmpfs",
 337                           "tracefs");
 338 }
 339
 340 bool fstype_is_ro(const char *fstype) {
 341         /* All Linux file systems that are necessarily read-only */
 342         return STR_IN_SET(fstype,
 343                           "DM_verity_hash",
 344                           "iso9660",
 345                           "squashfs");
 346 }
 347
 348 bool fstype_can_discard(const char *fstype) {
 349         return STR_IN_SET(fstype,
 350                           "btrfs",
 351                           "ext4",
 352                           "vfat",
 353                           "xfs");
 354 }
 355
 356 bool fstype_can_uid_gid(const char *fstype) {
 357
 358         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 359          * current and future. */
 360
 361         return STR_IN_SET(fstype,
 362                           "adfs",
 363                           "fat",
 364                           "hfs",
 365                           "hpfs",
 366                           "iso9660",
 367                           "msdos",
 368                           "ntfs",
 369                           "vfat");
 370 }
 371
 372 int dev_is_devtmpfs(void) {
 373         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 374         int mount_id, r;
 375         char *e;
 376
 377         r = path_get_mnt_id("/dev", &mount_id);
 378         if (r < 0)
 379                 return r;
 380
 381         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 382         if (!proc_self_mountinfo)
 383                 return -errno;
 384
 385         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 386
 387         for (;;) {
 388                 _cleanup_free_ char *line = NULL;
 389                 int mid;
 390
 391                 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
 392                 if (r < 0)
 393                         return r;
 394                 if (r == 0)
 395                         break;
 396
 397                 if (sscanf(line, "%i", &mid) != 1)
 398                         continue;
 399
 400                 if (mid != mount_id)
 401                         continue;
 402
 403                 e = strstr(line, " - ");
 404                 if (!e)
 405                         continue;
 406
 407                 /* accept any name that starts with the currently expected type */
 408                 if (startswith(e + 3, "devtmpfs"))
 409                         return true;
 410         }
 411
 412         return false;
 413 }
 414
 415 const char *mount_propagation_flags_to_string(unsigned long flags) {
 416
 417         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 418         case 0:
 419                 return "";
 420         case MS_SHARED:
 421                 return "shared";
 422         case MS_SLAVE:
 423                 return "slave";
 424         case MS_PRIVATE:
 425                 return "private";
 426         }
 427
 428         return NULL;
 429 }
 430
 431 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 432
 433         if (isempty(name))
 434                 *ret = 0;
 435         else if (streq(name, "shared"))
 436                 *ret = MS_SHARED;
 437         else if (streq(name, "slave"))
 438                 *ret = MS_SLAVE;
 439         else if (streq(name, "private"))
 440                 *ret = MS_PRIVATE;
 441         else
 442                 return -EINVAL;
 443         return 0;
 444 }