src/basic/mount-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <sys/mount.h>
  24 #include <sys/stat.h>
  25 #include <sys/statvfs.h>
  26 #include <unistd.h>
  27
  28 #include "alloc-util.h"
  29 #include "escape.h"
  30 #include "fd-util.h"
  31 #include "fileio.h"
  32 #include "hashmap.h"
  33 #include "mount-util.h"
  34 #include "parse-util.h"
  35 #include "path-util.h"
  36 #include "set.h"
  37 #include "stdio-util.h"
  38 #include "string-util.h"
  39
  40 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  41         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  42         _cleanup_free_ char *fdinfo = NULL;
  43         _cleanup_close_ int subfd = -1;
  44         char *p;
  45         int r;
  46
  47         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  48                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
  49         else {
  50                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
  51                 if (subfd < 0)
  52                         return -errno;
  53
  54                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
  55         }
  56
  57         r = read_full_file(path, &fdinfo, NULL);
  58         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
  59                 return -EOPNOTSUPP;
  60         if (r < 0)
  61                 return -errno;
  62
  63         p = startswith(fdinfo, "mnt_id:");
  64         if (!p) {
  65                 p = strstr(fdinfo, "\nmnt_id:");
  66                 if (!p) /* The mnt_id field is a relatively new addition */
  67                         return -EOPNOTSUPP;
  68
  69                 p += 8;
  70         }
  71
  72         p += strspn(p, WHITESPACE);
  73         p[strcspn(p, WHITESPACE)] = 0;
  74
  75         return safe_atoi(p, mnt_id);
  76 }
  77
  78
  79 int fd_is_mount_point(int fd, const char *filename, int flags) {
  80         union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
  81         int mount_id = -1, mount_id_parent = -1;
  82         bool nosupp = false, check_st_dev = true;
  83         struct stat a, b;
  84         int r;
  85
  86         assert(fd >= 0);
  87         assert(filename);
  88
  89         /* First we will try the name_to_handle_at() syscall, which
  90          * tells us the mount id and an opaque file "handle". It is
  91          * not supported everywhere though (kernel compile-time
  92          * option, not all file systems are hooked up). If it works
  93          * the mount id is usually good enough to tell us whether
  94          * something is a mount point.
  95          *
  96          * If that didn't work we will try to read the mount id from
  97          * /proc/self/fdinfo/<fd>. This is almost as good as
  98          * name_to_handle_at(), however, does not return the
  99          * opaque file handle. The opaque file handle is pretty useful
 100          * to detect the root directory, which we should always
 101          * consider a mount point. Hence we use this only as
 102          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 103          * kernel addition.
 104          *
 105          * As last fallback we do traditional fstat() based st_dev
 106          * comparisons. This is how things were traditionally done,
 107          * but unionfs breaks this since it exposes file
 108          * systems with a variety of st_dev reported. Also, btrfs
 109          * subvolumes have different st_dev, even though they aren't
 110          * real mounts of their own. */
 111
 112         r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
 113         if (r < 0) {
 114                 if (errno == ENOSYS)
 115                         /* This kernel does not support name_to_handle_at()
 116                          * fall back to simpler logic. */
 117                         goto fallback_fdinfo;
 118                 else if (errno == EOPNOTSUPP)
 119                         /* This kernel or file system does not support
 120                          * name_to_handle_at(), hence let's see if the
 121                          * upper fs supports it (in which case it is a
 122                          * mount point), otherwise fallback to the
 123                          * traditional stat() logic */
 124                         nosupp = true;
 125                 else
 126                         return -errno;
 127         }
 128
 129         r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
 130         if (r < 0) {
 131                 if (errno == EOPNOTSUPP) {
 132                         if (nosupp)
 133                                 /* Neither parent nor child do name_to_handle_at()?
 134                                    We have no choice but to fall back. */
 135                                 goto fallback_fdinfo;
 136                         else
 137                                 /* The parent can't do name_to_handle_at() but the
 138                                  * directory we are interested in can?
 139                                  * If so, it must be a mount point. */
 140                                 return 1;
 141                 } else
 142                         return -errno;
 143         }
 144
 145         /* The parent can do name_to_handle_at() but the
 146          * directory we are interested in can't? If so, it
 147          * must be a mount point. */
 148         if (nosupp)
 149                 return 1;
 150
 151         /* If the file handle for the directory we are
 152          * interested in and its parent are identical, we
 153          * assume this is the root directory, which is a mount
 154          * point. */
 155
 156         if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
 157             h.handle.handle_type == h_parent.handle.handle_type &&
 158             memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
 159                 return 1;
 160
 161         return mount_id != mount_id_parent;
 162
 163 fallback_fdinfo:
 164         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 165         if (r == -EOPNOTSUPP)
 166                 goto fallback_fstat;
 167         if (r < 0)
 168                 return r;
 169
 170         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 171         if (r < 0)
 172                 return r;
 173
 174         if (mount_id != mount_id_parent)
 175                 return 1;
 176
 177         /* Hmm, so, the mount ids are the same. This leaves one
 178          * special case though for the root file system. For that,
 179          * let's see if the parent directory has the same inode as we
 180          * are interested in. Hence, let's also do fstat() checks now,
 181          * too, but avoid the st_dev comparisons, since they aren't
 182          * that useful on unionfs mounts. */
 183         check_st_dev = false;
 184
 185 fallback_fstat:
 186         /* yay for fstatat() taking a different set of flags than the other
 187          * _at() above */
 188         if (flags & AT_SYMLINK_FOLLOW)
 189                 flags &= ~AT_SYMLINK_FOLLOW;
 190         else
 191                 flags |= AT_SYMLINK_NOFOLLOW;
 192         if (fstatat(fd, filename, &a, flags) < 0)
 193                 return -errno;
 194
 195         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 196                 return -errno;
 197
 198         /* A directory with same device and inode as its parent? Must
 199          * be the root directory */
 200         if (a.st_dev == b.st_dev &&
 201             a.st_ino == b.st_ino)
 202                 return 1;
 203
 204         return check_st_dev && (a.st_dev != b.st_dev);
 205 }
 206
 207 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 208 int path_is_mount_point(const char *t, int flags) {
 209         _cleanup_close_ int fd = -1;
 210         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 211
 212         assert(t);
 213
 214         if (path_equal(t, "/"))
 215                 return 1;
 216
 217         /* we need to resolve symlinks manually, we can't just rely on
 218          * fd_is_mount_point() to do that for us; if we have a structure like
 219          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 220          * look at needs to be /usr, not /. */
 221         if (flags & AT_SYMLINK_FOLLOW) {
 222                 canonical = canonicalize_file_name(t);
 223                 if (!canonical)
 224                         return -errno;
 225
 226                 t = canonical;
 227         }
 228
 229         parent = dirname_malloc(t);
 230         if (!parent)
 231                 return -ENOMEM;
 232
 233         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 234         if (fd < 0)
 235                 return -errno;
 236
 237         return fd_is_mount_point(fd, basename(t), flags);
 238 }
 239
 240 int umount_recursive(const char *prefix, int flags) {
 241         bool again;
 242         int n = 0, r;
 243
 244         /* Try to umount everything recursively below a
 245          * directory. Also, take care of stacked mounts, and keep
 246          * unmounting them until they are gone. */
 247
 248         do {
 249                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 250
 251                 again = false;
 252                 r = 0;
 253
 254                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 255                 if (!proc_self_mountinfo)
 256                         return -errno;
 257
 258                 for (;;) {
 259                         _cleanup_free_ char *path = NULL, *p = NULL;
 260                         int k;
 261
 262                         k = fscanf(proc_self_mountinfo,
 263                                    "%*s "       /* (1) mount id */
 264                                    "%*s "       /* (2) parent id */
 265                                    "%*s "       /* (3) major:minor */
 266                                    "%*s "       /* (4) root */
 267                                    "%ms "       /* (5) mount point */
 268                                    "%*s"        /* (6) mount options */
 269                                    "%*[^-]"     /* (7) optional fields */
 270                                    "- "         /* (8) separator */
 271                                    "%*s "       /* (9) file system type */
 272                                    "%*s"        /* (10) mount source */
 273                                    "%*s"        /* (11) mount options 2 */
 274                                    "%*[^\n]",   /* some rubbish at the end */
 275                                    &path);
 276                         if (k != 1) {
 277                                 if (k == EOF)
 278                                         break;
 279
 280                                 continue;
 281                         }
 282
 283                         r = cunescape(path, UNESCAPE_RELAX, &p);
 284                         if (r < 0)
 285                                 return r;
 286
 287                         if (!path_startswith(p, prefix))
 288                                 continue;
 289
 290                         if (umount2(p, flags) < 0) {
 291                                 r = -errno;
 292                                 continue;
 293                         }
 294
 295                         again = true;
 296                         n++;
 297
 298                         break;
 299                 }
 300
 301         } while (again);
 302
 303         return r ? r : n;
 304 }
 305
 306 static int get_mount_flags(const char *path, unsigned long *flags) {
 307         struct statvfs buf;
 308
 309         if (statvfs(path, &buf) < 0)
 310                 return -errno;
 311         *flags = buf.f_flag;
 312         return 0;
 313 }
 314
 315 int bind_remount_recursive(const char *prefix, bool ro) {
 316         _cleanup_set_free_free_ Set *done = NULL;
 317         _cleanup_free_ char *cleaned = NULL;
 318         int r;
 319
 320         /* Recursively remount a directory (and all its submounts)
 321          * read-only or read-write. If the directory is already
 322          * mounted, we reuse the mount and simply mark it
 323          * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 324          * operation). If it isn't we first make it one. Afterwards we
 325          * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
 326          * submounts we can access, too. When mounts are stacked on
 327          * the same mount point we only care for each individual
 328          * "top-level" mount on each point, as we cannot
 329          * influence/access the underlying mounts anyway. We do not
 330          * have any effect on future submounts that might get
 331          * propagated, they migt be writable. This includes future
 332          * submounts that have been triggered via autofs. */
 333
 334         cleaned = strdup(prefix);
 335         if (!cleaned)
 336                 return -ENOMEM;
 337
 338         path_kill_slashes(cleaned);
 339
 340         done = set_new(&string_hash_ops);
 341         if (!done)
 342                 return -ENOMEM;
 343
 344         for (;;) {
 345                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 346                 _cleanup_set_free_free_ Set *todo = NULL;
 347                 bool top_autofs = false;
 348                 char *x;
 349                 unsigned long orig_flags;
 350
 351                 todo = set_new(&string_hash_ops);
 352                 if (!todo)
 353                         return -ENOMEM;
 354
 355                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 356                 if (!proc_self_mountinfo)
 357                         return -errno;
 358
 359                 for (;;) {
 360                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 361                         int k;
 362
 363                         k = fscanf(proc_self_mountinfo,
 364                                    "%*s "       /* (1) mount id */
 365                                    "%*s "       /* (2) parent id */
 366                                    "%*s "       /* (3) major:minor */
 367                                    "%*s "       /* (4) root */
 368                                    "%ms "       /* (5) mount point */
 369                                    "%*s"        /* (6) mount options (superblock) */
 370                                    "%*[^-]"     /* (7) optional fields */
 371                                    "- "         /* (8) separator */
 372                                    "%ms "       /* (9) file system type */
 373                                    "%*s"        /* (10) mount source */
 374                                    "%*s"        /* (11) mount options (bind mount) */
 375                                    "%*[^\n]",   /* some rubbish at the end */
 376                                    &path,
 377                                    &type);
 378                         if (k != 2) {
 379                                 if (k == EOF)
 380                                         break;
 381
 382                                 continue;
 383                         }
 384
 385                         r = cunescape(path, UNESCAPE_RELAX, &p);
 386                         if (r < 0)
 387                                 return r;
 388
 389                         /* Let's ignore autofs mounts.  If they aren't
 390                          * triggered yet, we want to avoid triggering
 391                          * them, as we don't make any guarantees for
 392                          * future submounts anyway.  If they are
 393                          * already triggered, then we will find
 394                          * another entry for this. */
 395                         if (streq(type, "autofs")) {
 396                                 top_autofs = top_autofs || path_equal(cleaned, p);
 397                                 continue;
 398                         }
 399
 400                         if (path_startswith(p, cleaned) &&
 401                             !set_contains(done, p)) {
 402
 403                                 r = set_consume(todo, p);
 404                                 p = NULL;
 405
 406                                 if (r == -EEXIST)
 407                                         continue;
 408                                 if (r < 0)
 409                                         return r;
 410                         }
 411                 }
 412
 413                 /* If we have no submounts to process anymore and if
 414                  * the root is either already done, or an autofs, we
 415                  * are done */
 416                 if (set_isempty(todo) &&
 417                     (top_autofs || set_contains(done, cleaned)))
 418                         return 0;
 419
 420                 if (!set_contains(done, cleaned) &&
 421                     !set_contains(todo, cleaned)) {
 422                         /* The prefix directory itself is not yet a
 423                          * mount, make it one. */
 424                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 425                                 return -errno;
 426
 427                         orig_flags = 0;
 428                         (void) get_mount_flags(cleaned, &orig_flags);
 429                         orig_flags &= ~MS_RDONLY;
 430
 431                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 432                                 return -errno;
 433
 434                         x = strdup(cleaned);
 435                         if (!x)
 436                                 return -ENOMEM;
 437
 438                         r = set_consume(done, x);
 439                         if (r < 0)
 440                                 return r;
 441                 }
 442
 443                 while ((x = set_steal_first(todo))) {
 444
 445                         r = set_consume(done, x);
 446                         if (r == -EEXIST || r == 0)
 447                                 continue;
 448                         if (r < 0)
 449                                 return r;
 450
 451                         /* Deal with mount points that are obstructed by a
 452                          * later mount */
 453                         r = path_is_mount_point(x, 0);
 454                         if (r == -ENOENT || r == 0)
 455                                 continue;
 456                         if (r < 0)
 457                                 return r;
 458
 459                         /* Try to reuse the original flag set */
 460                         orig_flags = 0;
 461                         (void) get_mount_flags(x, &orig_flags);
 462                         orig_flags &= ~MS_RDONLY;
 463
 464                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 465                                 return -errno;
 466
 467                 }
 468         }
 469 }
 470
 471 int mount_move_root(const char *path) {
 472         assert(path);
 473
 474         if (chdir(path) < 0)
 475                 return -errno;
 476
 477         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 478                 return -errno;
 479
 480         if (chroot(".") < 0)
 481                 return -errno;
 482
 483         if (chdir("/") < 0)
 484                 return -errno;
 485
 486         return 0;
 487 }
 488
 489 bool fstype_is_network(const char *fstype) {
 490         static const char table[] =
 491                 "afs\0"
 492                 "cifs\0"
 493                 "smbfs\0"
 494                 "sshfs\0"
 495                 "ncpfs\0"
 496                 "ncp\0"
 497                 "nfs\0"
 498                 "nfs4\0"
 499                 "gfs\0"
 500                 "gfs2\0"
 501                 "glusterfs\0"
 502                 "pvfs2\0" /* OrangeFS */
 503                 "ocfs2\0"
 504                 ;
 505
 506         const char *x;
 507
 508         x = startswith(fstype, "fuse.");
 509         if (x)
 510                 fstype = x;
 511
 512         return nulstr_contains(table, fstype);
 513 }
 514
 515 int repeat_unmount(const char *path, int flags) {
 516         bool done = false;
 517
 518         assert(path);
 519
 520         /* If there are multiple mounts on a mount point, this
 521          * removes them all */
 522
 523         for (;;) {
 524                 if (umount2(path, flags) < 0) {
 525
 526                         if (errno == EINVAL)
 527                                 return done;
 528
 529                         return -errno;
 530                 }
 531
 532                 done = true;
 533         }
 534 }
 535
 536 const char* mode_to_inaccessible_node(mode_t mode) {
 537         /* This function maps a node type to the correspondent inaccessible node type.
 538          * Character and block inaccessible devices may not be created (because major=0 and minor=0),
 539          * in such case we map character and block devices to the inaccessible node type socket. */
 540         switch(mode & S_IFMT) {
 541                 case S_IFREG:
 542                         return "/run/systemd/inaccessible/reg";
 543                 case S_IFDIR:
 544                         return "/run/systemd/inaccessible/dir";
 545                 case S_IFCHR:
 546                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 547                                 return "/run/systemd/inaccessible/chr";
 548                         return "/run/systemd/inaccessible/sock";
 549                 case S_IFBLK:
 550                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 551                                 return "/run/systemd/inaccessible/blk";
 552                         return "/run/systemd/inaccessible/sock";
 553                 case S_IFIFO:
 554                         return "/run/systemd/inaccessible/fifo";
 555                 case S_IFSOCK:
 556                         return "/run/systemd/inaccessible/sock";
 557         }
 558         return NULL;
 559 }