src/basic/mount-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <string.h>
  23 #include <sys/mount.h>
  24 #include <sys/statvfs.h>
  25
  26 #include "escape.h"
  27 #include "fd-util.h"
  28 #include "fileio.h"
  29 #include "mount-util.h"
  30 #include "parse-util.h"
  31 #include "path-util.h"
  32 #include "set.h"
  33 #include "stdio-util.h"
  34 #include "string-util.h"
  35 #include "util.h"
  36
  37 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  38         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  39         _cleanup_free_ char *fdinfo = NULL;
  40         _cleanup_close_ int subfd = -1;
  41         char *p;
  42         int r;
  43
  44         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  45                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
  46         else {
  47                 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
  48                 if (subfd < 0)
  49                         return -errno;
  50
  51                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
  52         }
  53
  54         r = read_full_file(path, &fdinfo, NULL);
  55         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
  56                 return -EOPNOTSUPP;
  57         if (r < 0)
  58                 return -errno;
  59
  60         p = startswith(fdinfo, "mnt_id:");
  61         if (!p) {
  62                 p = strstr(fdinfo, "\nmnt_id:");
  63                 if (!p) /* The mnt_id field is a relatively new addition */
  64                         return -EOPNOTSUPP;
  65
  66                 p += 8;
  67         }
  68
  69         p += strspn(p, WHITESPACE);
  70         p[strcspn(p, WHITESPACE)] = 0;
  71
  72         return safe_atoi(p, mnt_id);
  73 }
  74
  75
  76 int fd_is_mount_point(int fd, const char *filename, int flags) {
  77         union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
  78         int mount_id = -1, mount_id_parent = -1;
  79         bool nosupp = false, check_st_dev = true;
  80         struct stat a, b;
  81         int r;
  82
  83         assert(fd >= 0);
  84         assert(filename);
  85
  86         /* First we will try the name_to_handle_at() syscall, which
  87          * tells us the mount id and an opaque file "handle". It is
  88          * not supported everywhere though (kernel compile-time
  89          * option, not all file systems are hooked up). If it works
  90          * the mount id is usually good enough to tell us whether
  91          * something is a mount point.
  92          *
  93          * If that didn't work we will try to read the mount id from
  94          * /proc/self/fdinfo/<fd>. This is almost as good as
  95          * name_to_handle_at(), however, does not return the
  96          * opaque file handle. The opaque file handle is pretty useful
  97          * to detect the root directory, which we should always
  98          * consider a mount point. Hence we use this only as
  99          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 100          * kernel addition.
 101          *
 102          * As last fallback we do traditional fstat() based st_dev
 103          * comparisons. This is how things were traditionally done,
 104          * but unionfs breaks breaks this since it exposes file
 105          * systems with a variety of st_dev reported. Also, btrfs
 106          * subvolumes have different st_dev, even though they aren't
 107          * real mounts of their own. */
 108
 109         r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
 110         if (r < 0) {
 111                 if (errno == ENOSYS)
 112                         /* This kernel does not support name_to_handle_at()
 113                          * fall back to simpler logic. */
 114                         goto fallback_fdinfo;
 115                 else if (errno == EOPNOTSUPP)
 116                         /* This kernel or file system does not support
 117                          * name_to_handle_at(), hence let's see if the
 118                          * upper fs supports it (in which case it is a
 119                          * mount point), otherwise fallback to the
 120                          * traditional stat() logic */
 121                         nosupp = true;
 122                 else
 123                         return -errno;
 124         }
 125
 126         r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
 127         if (r < 0) {
 128                 if (errno == EOPNOTSUPP) {
 129                         if (nosupp)
 130                                 /* Neither parent nor child do name_to_handle_at()?
 131                                    We have no choice but to fall back. */
 132                                 goto fallback_fdinfo;
 133                         else
 134                                 /* The parent can't do name_to_handle_at() but the
 135                                  * directory we are interested in can?
 136                                  * If so, it must be a mount point. */
 137                                 return 1;
 138                 } else
 139                         return -errno;
 140         }
 141
 142         /* The parent can do name_to_handle_at() but the
 143          * directory we are interested in can't? If so, it
 144          * must be a mount point. */
 145         if (nosupp)
 146                 return 1;
 147
 148         /* If the file handle for the directory we are
 149          * interested in and its parent are identical, we
 150          * assume this is the root directory, which is a mount
 151          * point. */
 152
 153         if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
 154             h.handle.handle_type == h_parent.handle.handle_type &&
 155             memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
 156                 return 1;
 157
 158         return mount_id != mount_id_parent;
 159
 160 fallback_fdinfo:
 161         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 162         if (r == -EOPNOTSUPP)
 163                 goto fallback_fstat;
 164         if (r < 0)
 165                 return r;
 166
 167         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 168         if (r < 0)
 169                 return r;
 170
 171         if (mount_id != mount_id_parent)
 172                 return 1;
 173
 174         /* Hmm, so, the mount ids are the same. This leaves one
 175          * special case though for the root file system. For that,
 176          * let's see if the parent directory has the same inode as we
 177          * are interested in. Hence, let's also do fstat() checks now,
 178          * too, but avoid the st_dev comparisons, since they aren't
 179          * that useful on unionfs mounts. */
 180         check_st_dev = false;
 181
 182 fallback_fstat:
 183         /* yay for fstatat() taking a different set of flags than the other
 184          * _at() above */
 185         if (flags & AT_SYMLINK_FOLLOW)
 186                 flags &= ~AT_SYMLINK_FOLLOW;
 187         else
 188                 flags |= AT_SYMLINK_NOFOLLOW;
 189         if (fstatat(fd, filename, &a, flags) < 0)
 190                 return -errno;
 191
 192         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 193                 return -errno;
 194
 195         /* A directory with same device and inode as its parent? Must
 196          * be the root directory */
 197         if (a.st_dev == b.st_dev &&
 198             a.st_ino == b.st_ino)
 199                 return 1;
 200
 201         return check_st_dev && (a.st_dev != b.st_dev);
 202 }
 203
 204 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 205 int path_is_mount_point(const char *t, int flags) {
 206         _cleanup_close_ int fd = -1;
 207         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 208
 209         assert(t);
 210
 211         if (path_equal(t, "/"))
 212                 return 1;
 213
 214         /* we need to resolve symlinks manually, we can't just rely on
 215          * fd_is_mount_point() to do that for us; if we have a structure like
 216          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 217          * look at needs to be /usr, not /. */
 218         if (flags & AT_SYMLINK_FOLLOW) {
 219                 canonical = canonicalize_file_name(t);
 220                 if (!canonical)
 221                         return -errno;
 222
 223                 t = canonical;
 224         }
 225
 226         parent = dirname_malloc(t);
 227         if (!parent)
 228                 return -ENOMEM;
 229
 230         fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
 231         if (fd < 0)
 232                 return -errno;
 233
 234         return fd_is_mount_point(fd, basename(t), flags);
 235 }
 236
 237 int umount_recursive(const char *prefix, int flags) {
 238         bool again;
 239         int n = 0, r;
 240
 241         /* Try to umount everything recursively below a
 242          * directory. Also, take care of stacked mounts, and keep
 243          * unmounting them until they are gone. */
 244
 245         do {
 246                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 247
 248                 again = false;
 249                 r = 0;
 250
 251                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 252                 if (!proc_self_mountinfo)
 253                         return -errno;
 254
 255                 for (;;) {
 256                         _cleanup_free_ char *path = NULL, *p = NULL;
 257                         int k;
 258
 259                         k = fscanf(proc_self_mountinfo,
 260                                    "%*s "       /* (1) mount id */
 261                                    "%*s "       /* (2) parent id */
 262                                    "%*s "       /* (3) major:minor */
 263                                    "%*s "       /* (4) root */
 264                                    "%ms "       /* (5) mount point */
 265                                    "%*s"        /* (6) mount options */
 266                                    "%*[^-]"     /* (7) optional fields */
 267                                    "- "         /* (8) separator */
 268                                    "%*s "       /* (9) file system type */
 269                                    "%*s"        /* (10) mount source */
 270                                    "%*s"        /* (11) mount options 2 */
 271                                    "%*[^\n]",   /* some rubbish at the end */
 272                                    &path);
 273                         if (k != 1) {
 274                                 if (k == EOF)
 275                                         break;
 276
 277                                 continue;
 278                         }
 279
 280                         r = cunescape(path, UNESCAPE_RELAX, &p);
 281                         if (r < 0)
 282                                 return r;
 283
 284                         if (!path_startswith(p, prefix))
 285                                 continue;
 286
 287                         if (umount2(p, flags) < 0) {
 288                                 r = -errno;
 289                                 continue;
 290                         }
 291
 292                         again = true;
 293                         n++;
 294
 295                         break;
 296                 }
 297
 298         } while (again);
 299
 300         return r ? r : n;
 301 }
 302
 303 static int get_mount_flags(const char *path, unsigned long *flags) {
 304         struct statvfs buf;
 305
 306         if (statvfs(path, &buf) < 0)
 307                 return -errno;
 308         *flags = buf.f_flag;
 309         return 0;
 310 }
 311
 312 int bind_remount_recursive(const char *prefix, bool ro) {
 313         _cleanup_set_free_free_ Set *done = NULL;
 314         _cleanup_free_ char *cleaned = NULL;
 315         int r;
 316
 317         /* Recursively remount a directory (and all its submounts)
 318          * read-only or read-write. If the directory is already
 319          * mounted, we reuse the mount and simply mark it
 320          * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 321          * operation). If it isn't we first make it one. Afterwards we
 322          * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
 323          * submounts we can access, too. When mounts are stacked on
 324          * the same mount point we only care for each individual
 325          * "top-level" mount on each point, as we cannot
 326          * influence/access the underlying mounts anyway. We do not
 327          * have any effect on future submounts that might get
 328          * propagated, they migt be writable. This includes future
 329          * submounts that have been triggered via autofs. */
 330
 331         cleaned = strdup(prefix);
 332         if (!cleaned)
 333                 return -ENOMEM;
 334
 335         path_kill_slashes(cleaned);
 336
 337         done = set_new(&string_hash_ops);
 338         if (!done)
 339                 return -ENOMEM;
 340
 341         for (;;) {
 342                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 343                 _cleanup_set_free_free_ Set *todo = NULL;
 344                 bool top_autofs = false;
 345                 char *x;
 346                 unsigned long orig_flags;
 347
 348                 todo = set_new(&string_hash_ops);
 349                 if (!todo)
 350                         return -ENOMEM;
 351
 352                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 353                 if (!proc_self_mountinfo)
 354                         return -errno;
 355
 356                 for (;;) {
 357                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 358                         int k;
 359
 360                         k = fscanf(proc_self_mountinfo,
 361                                    "%*s "       /* (1) mount id */
 362                                    "%*s "       /* (2) parent id */
 363                                    "%*s "       /* (3) major:minor */
 364                                    "%*s "       /* (4) root */
 365                                    "%ms "       /* (5) mount point */
 366                                    "%*s"        /* (6) mount options (superblock) */
 367                                    "%*[^-]"     /* (7) optional fields */
 368                                    "- "         /* (8) separator */
 369                                    "%ms "       /* (9) file system type */
 370                                    "%*s"        /* (10) mount source */
 371                                    "%*s"        /* (11) mount options (bind mount) */
 372                                    "%*[^\n]",   /* some rubbish at the end */
 373                                    &path,
 374                                    &type);
 375                         if (k != 2) {
 376                                 if (k == EOF)
 377                                         break;
 378
 379                                 continue;
 380                         }
 381
 382                         r = cunescape(path, UNESCAPE_RELAX, &p);
 383                         if (r < 0)
 384                                 return r;
 385
 386                         /* Let's ignore autofs mounts.  If they aren't
 387                          * triggered yet, we want to avoid triggering
 388                          * them, as we don't make any guarantees for
 389                          * future submounts anyway.  If they are
 390                          * already triggered, then we will find
 391                          * another entry for this. */
 392                         if (streq(type, "autofs")) {
 393                                 top_autofs = top_autofs || path_equal(cleaned, p);
 394                                 continue;
 395                         }
 396
 397                         if (path_startswith(p, cleaned) &&
 398                             !set_contains(done, p)) {
 399
 400                                 r = set_consume(todo, p);
 401                                 p = NULL;
 402
 403                                 if (r == -EEXIST)
 404                                         continue;
 405                                 if (r < 0)
 406                                         return r;
 407                         }
 408                 }
 409
 410                 /* If we have no submounts to process anymore and if
 411                  * the root is either already done, or an autofs, we
 412                  * are done */
 413                 if (set_isempty(todo) &&
 414                     (top_autofs || set_contains(done, cleaned)))
 415                         return 0;
 416
 417                 if (!set_contains(done, cleaned) &&
 418                     !set_contains(todo, cleaned)) {
 419                         /* The prefix directory itself is not yet a
 420                          * mount, make it one. */
 421                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 422                                 return -errno;
 423
 424                         orig_flags = 0;
 425                         (void) get_mount_flags(cleaned, &orig_flags);
 426                         orig_flags &= ~MS_RDONLY;
 427
 428                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 429                                 return -errno;
 430
 431                         x = strdup(cleaned);
 432                         if (!x)
 433                                 return -ENOMEM;
 434
 435                         r = set_consume(done, x);
 436                         if (r < 0)
 437                                 return r;
 438                 }
 439
 440                 while ((x = set_steal_first(todo))) {
 441
 442                         r = set_consume(done, x);
 443                         if (r == -EEXIST || r == 0)
 444                                 continue;
 445                         if (r < 0)
 446                                 return r;
 447
 448                         /* Try to reuse the original flag set, but
 449                          * don't care for errors, in case of
 450                          * obstructed mounts */
 451                         orig_flags = 0;
 452                         (void) get_mount_flags(x, &orig_flags);
 453                         orig_flags &= ~MS_RDONLY;
 454
 455                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
 456
 457                                 /* Deal with mount points that are
 458                                  * obstructed by a later mount */
 459
 460                                 if (errno != ENOENT)
 461                                         return -errno;
 462                         }
 463
 464                 }
 465         }
 466 }
 467
 468 int mount_move_root(const char *path) {
 469         assert(path);
 470
 471         if (chdir(path) < 0)
 472                 return -errno;
 473
 474         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 475                 return -errno;
 476
 477         if (chroot(".") < 0)
 478                 return -errno;
 479
 480         if (chdir("/") < 0)
 481                 return -errno;
 482
 483         return 0;
 484 }