src/basic/mountpoint-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <sys/mount.h>
   6
   7 #include "alloc-util.h"
   8 #include "fd-util.h"
   9 #include "fileio.h"
  10 #include "fs-util.h"
  11 #include "missing.h"
  12 #include "mountpoint-util.h"
  13 #include "parse-util.h"
  14 #include "path-util.h"
  15 #include "stdio-util.h"
  16 #include "strv.h"
  17
  18 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  19  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  20  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  21  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  22  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  23  * with large file handles anyway. */
  24 #define ORIGINAL_MAX_HANDLE_SZ 128
  25
  26 int name_to_handle_at_loop(
  27                 int fd,
  28                 const char *path,
  29                 struct file_handle **ret_handle,
  30                 int *ret_mnt_id,
  31                 int flags) {
  32
  33         _cleanup_free_ struct file_handle *h = NULL;
  34         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  35
  36         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  37          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  38          * start value, it is not an upper bound on the buffer size required.
  39          *
  40          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  41          * as NULL if there's no interest in either. */
  42
  43         for (;;) {
  44                 int mnt_id = -1;
  45
  46                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  47                 if (!h)
  48                         return -ENOMEM;
  49
  50                 h->handle_bytes = n;
  51
  52                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  53
  54                         if (ret_handle)
  55                                 *ret_handle = TAKE_PTR(h);
  56
  57                         if (ret_mnt_id)
  58                                 *ret_mnt_id = mnt_id;
  59
  60                         return 0;
  61                 }
  62                 if (errno != EOVERFLOW)
  63                         return -errno;
  64
  65                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  66
  67                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  68                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  69                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  70
  71                         *ret_mnt_id = mnt_id;
  72                         return 0;
  73                 }
  74
  75                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
  76                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
  77                  * buffer. In that case propagate EOVERFLOW */
  78                 if (h->handle_bytes <= n)
  79                         return -EOVERFLOW;
  80
  81                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
  82                 n = h->handle_bytes;
  83                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
  84                         return -EOVERFLOW;
  85
  86                 h = mfree(h);
  87         }
  88 }
  89
  90 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  91         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  92         _cleanup_free_ char *fdinfo = NULL;
  93         _cleanup_close_ int subfd = -1;
  94         char *p;
  95         int r;
  96
  97         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  98                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
  99         else {
 100                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
 101                 if (subfd < 0)
 102                         return -errno;
 103
 104                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 105         }
 106
 107         r = read_full_file(path, &fdinfo, NULL);
 108         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 109                 return -EOPNOTSUPP;
 110         if (r < 0)
 111                 return r;
 112
 113         p = startswith(fdinfo, "mnt_id:");
 114         if (!p) {
 115                 p = strstr(fdinfo, "\nmnt_id:");
 116                 if (!p) /* The mnt_id field is a relatively new addition */
 117                         return -EOPNOTSUPP;
 118
 119                 p += 8;
 120         }
 121
 122         p += strspn(p, WHITESPACE);
 123         p[strcspn(p, WHITESPACE)] = 0;
 124
 125         return safe_atoi(p, mnt_id);
 126 }
 127
 128 int fd_is_mount_point(int fd, const char *filename, int flags) {
 129         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 130         int mount_id = -1, mount_id_parent = -1;
 131         bool nosupp = false, check_st_dev = true;
 132         struct stat a, b;
 133         int r;
 134
 135         assert(fd >= 0);
 136         assert(filename);
 137
 138         /* First we will try the name_to_handle_at() syscall, which
 139          * tells us the mount id and an opaque file "handle". It is
 140          * not supported everywhere though (kernel compile-time
 141          * option, not all file systems are hooked up). If it works
 142          * the mount id is usually good enough to tell us whether
 143          * something is a mount point.
 144          *
 145          * If that didn't work we will try to read the mount id from
 146          * /proc/self/fdinfo/<fd>. This is almost as good as
 147          * name_to_handle_at(), however, does not return the
 148          * opaque file handle. The opaque file handle is pretty useful
 149          * to detect the root directory, which we should always
 150          * consider a mount point. Hence we use this only as
 151          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 152          * kernel addition.
 153          *
 154          * As last fallback we do traditional fstat() based st_dev
 155          * comparisons. This is how things were traditionally done,
 156          * but unionfs breaks this since it exposes file
 157          * systems with a variety of st_dev reported. Also, btrfs
 158          * subvolumes have different st_dev, even though they aren't
 159          * real mounts of their own. */
 160
 161         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 162         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 163                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 164                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 165                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 166                  * (EINVAL): fall back to simpler logic. */
 167                 goto fallback_fdinfo;
 168         else if (r == -EOPNOTSUPP)
 169                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 170                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 171                  * logic */
 172                 nosupp = true;
 173         else if (r < 0)
 174                 return r;
 175
 176         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 177         if (r == -EOPNOTSUPP) {
 178                 if (nosupp)
 179                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 180                         goto fallback_fdinfo;
 181                 else
 182                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 183                          * it must be a mount point. */
 184                         return 1;
 185         } else if (r < 0)
 186                 return r;
 187
 188         /* The parent can do name_to_handle_at() but the
 189          * directory we are interested in can't? If so, it
 190          * must be a mount point. */
 191         if (nosupp)
 192                 return 1;
 193
 194         /* If the file handle for the directory we are
 195          * interested in and its parent are identical, we
 196          * assume this is the root directory, which is a mount
 197          * point. */
 198
 199         if (h->handle_bytes == h_parent->handle_bytes &&
 200             h->handle_type == h_parent->handle_type &&
 201             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 202                 return 1;
 203
 204         return mount_id != mount_id_parent;
 205
 206 fallback_fdinfo:
 207         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 208         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 209                 goto fallback_fstat;
 210         if (r < 0)
 211                 return r;
 212
 213         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 214         if (r < 0)
 215                 return r;
 216
 217         if (mount_id != mount_id_parent)
 218                 return 1;
 219
 220         /* Hmm, so, the mount ids are the same. This leaves one
 221          * special case though for the root file system. For that,
 222          * let's see if the parent directory has the same inode as we
 223          * are interested in. Hence, let's also do fstat() checks now,
 224          * too, but avoid the st_dev comparisons, since they aren't
 225          * that useful on unionfs mounts. */
 226         check_st_dev = false;
 227
 228 fallback_fstat:
 229         /* yay for fstatat() taking a different set of flags than the other
 230          * _at() above */
 231         if (flags & AT_SYMLINK_FOLLOW)
 232                 flags &= ~AT_SYMLINK_FOLLOW;
 233         else
 234                 flags |= AT_SYMLINK_NOFOLLOW;
 235         if (fstatat(fd, filename, &a, flags) < 0)
 236                 return -errno;
 237
 238         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 239                 return -errno;
 240
 241         /* A directory with same device and inode as its parent? Must
 242          * be the root directory */
 243         if (a.st_dev == b.st_dev &&
 244             a.st_ino == b.st_ino)
 245                 return 1;
 246
 247         return check_st_dev && (a.st_dev != b.st_dev);
 248 }
 249
 250 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 251 int path_is_mount_point(const char *t, const char *root, int flags) {
 252         _cleanup_free_ char *canonical = NULL;
 253         _cleanup_close_ int fd = -1;
 254         int r;
 255
 256         assert(t);
 257         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 258
 259         if (path_equal(t, "/"))
 260                 return 1;
 261
 262         /* we need to resolve symlinks manually, we can't just rely on
 263          * fd_is_mount_point() to do that for us; if we have a structure like
 264          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 265          * look at needs to be /usr, not /. */
 266         if (flags & AT_SYMLINK_FOLLOW) {
 267                 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
 268                 if (r < 0)
 269                         return r;
 270
 271                 t = canonical;
 272         }
 273
 274         fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
 275         if (fd < 0)
 276                 return -errno;
 277
 278         return fd_is_mount_point(fd, last_path_component(t), flags);
 279 }
 280
 281 int path_get_mnt_id(const char *path, int *ret) {
 282         int r;
 283
 284         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 285         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 286                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 287
 288         return r;
 289 }
 290
 291 bool fstype_is_network(const char *fstype) {
 292         const char *x;
 293
 294         x = startswith(fstype, "fuse.");
 295         if (x)
 296                 fstype = x;
 297
 298         return STR_IN_SET(fstype,
 299                           "afs",
 300                           "cifs",
 301                           "smbfs",
 302                           "sshfs",
 303                           "ncpfs",
 304                           "ncp",
 305                           "nfs",
 306                           "nfs4",
 307                           "gfs",
 308                           "gfs2",
 309                           "glusterfs",
 310                           "pvfs2", /* OrangeFS */
 311                           "ocfs2",
 312                           "lustre");
 313 }
 314
 315 bool fstype_is_api_vfs(const char *fstype) {
 316         return STR_IN_SET(fstype,
 317                           "autofs",
 318                           "bpf",
 319                           "cgroup",
 320                           "cgroup2",
 321                           "configfs",
 322                           "cpuset",
 323                           "debugfs",
 324                           "devpts",
 325                           "devtmpfs",
 326                           "efivarfs",
 327                           "fusectl",
 328                           "hugetlbfs",
 329                           "mqueue",
 330                           "proc",
 331                           "pstore",
 332                           "ramfs",
 333                           "securityfs",
 334                           "sysfs",
 335                           "tmpfs",
 336                           "tracefs");
 337 }
 338
 339 bool fstype_is_ro(const char *fstype) {
 340         /* All Linux file systems that are necessarily read-only */
 341         return STR_IN_SET(fstype,
 342                           "DM_verity_hash",
 343                           "iso9660",
 344                           "squashfs");
 345 }
 346
 347 bool fstype_can_discard(const char *fstype) {
 348         return STR_IN_SET(fstype,
 349                           "btrfs",
 350                           "ext4",
 351                           "vfat",
 352                           "xfs");
 353 }
 354
 355 bool fstype_can_uid_gid(const char *fstype) {
 356
 357         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 358          * current and future. */
 359
 360         return STR_IN_SET(fstype,
 361                           "adfs",
 362                           "fat",
 363                           "hfs",
 364                           "hpfs",
 365                           "iso9660",
 366                           "msdos",
 367                           "ntfs",
 368                           "vfat");
 369 }
 370
 371 int dev_is_devtmpfs(void) {
 372         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 373         int mount_id, r;
 374         char *e;
 375
 376         r = path_get_mnt_id("/dev", &mount_id);
 377         if (r < 0)
 378                 return r;
 379
 380         r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
 381         if (r < 0)
 382                 return r;
 383
 384         for (;;) {
 385                 _cleanup_free_ char *line = NULL;
 386                 int mid;
 387
 388                 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
 389                 if (r < 0)
 390                         return r;
 391                 if (r == 0)
 392                         break;
 393
 394                 if (sscanf(line, "%i", &mid) != 1)
 395                         continue;
 396
 397                 if (mid != mount_id)
 398                         continue;
 399
 400                 e = strstr(line, " - ");
 401                 if (!e)
 402                         continue;
 403
 404                 /* accept any name that starts with the currently expected type */
 405                 if (startswith(e + 3, "devtmpfs"))
 406                         return true;
 407         }
 408
 409         return false;
 410 }
 411
 412 const char *mount_propagation_flags_to_string(unsigned long flags) {
 413
 414         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 415         case 0:
 416                 return "";
 417         case MS_SHARED:
 418                 return "shared";
 419         case MS_SLAVE:
 420                 return "slave";
 421         case MS_PRIVATE:
 422                 return "private";
 423         }
 424
 425         return NULL;
 426 }
 427
 428 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 429
 430         if (isempty(name))
 431                 *ret = 0;
 432         else if (streq(name, "shared"))
 433                 *ret = MS_SHARED;
 434         else if (streq(name, "slave"))
 435                 *ret = MS_SLAVE;
 436         else if (streq(name, "private"))
 437                 *ret = MS_PRIVATE;
 438         else
 439                 return -EINVAL;
 440         return 0;
 441 }