[thirdparty/systemd.git] / src / basic / mount-util.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <string.h>
#include <sys/mount.h>
#include <sys/statvfs.h>

#include "alloc-util.h"
#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "mount-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "set.h"
#include "stdio-util.h"
#include "string-util.h"
#include "util.h"

static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
        char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
        _cleanup_free_ char *fdinfo = NULL;
        _cleanup_close_ int subfd = -1;
        char *p;
        int r;

        if ((flags & AT_EMPTY_PATH) && isempty(filename))
                xsprintf(path, "/proc/self/fdinfo/%i", fd);
        else {
                subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
                if (subfd < 0)
                        return -errno;

                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
        }

        r = read_full_file(path, &fdinfo, NULL);
        if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
                return -EOPNOTSUPP;
        if (r < 0)
                return -errno;

        p = startswith(fdinfo, "mnt_id:");
        if (!p) {
                p = strstr(fdinfo, "\nmnt_id:");
                if (!p) /* The mnt_id field is a relatively new addition */
                        return -EOPNOTSUPP;

                p += 8;
        }

        p += strspn(p, WHITESPACE);
        p[strcspn(p, WHITESPACE)] = 0;

        return safe_atoi(p, mnt_id);
}


int fd_is_mount_point(int fd, const char *filename, int flags) {
        union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
        int mount_id = -1, mount_id_parent = -1;
        bool nosupp = false, check_st_dev = true;
        struct stat a, b;
        int r;

        assert(fd >= 0);
        assert(filename);

        /* First we will try the name_to_handle_at() syscall, which
         * tells us the mount id and an opaque file "handle". It is
         * not supported everywhere though (kernel compile-time
         * option, not all file systems are hooked up). If it works
         * the mount id is usually good enough to tell us whether
         * something is a mount point.
         *
         * If that didn't work we will try to read the mount id from
         * /proc/self/fdinfo/<fd>. This is almost as good as
         * name_to_handle_at(), however, does not return the
         * opaque file handle. The opaque file handle is pretty useful
         * to detect the root directory, which we should always
         * consider a mount point. Hence we use this only as
         * fallback. Exporting the mnt_id in fdinfo is a pretty recent
         * kernel addition.
         *
         * As last fallback we do traditional fstat() based st_dev
         * comparisons. This is how things were traditionally done,
         * but unionfs breaks breaks this since it exposes file
         * systems with a variety of st_dev reported. Also, btrfs
         * subvolumes have different st_dev, even though they aren't
         * real mounts of their own. */

        r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
        if (r < 0) {
                if (errno == ENOSYS)
                        /* This kernel does not support name_to_handle_at()
                         * fall back to simpler logic. */
                        goto fallback_fdinfo;
                else if (errno == EOPNOTSUPP)
                        /* This kernel or file system does not support
                         * name_to_handle_at(), hence let's see if the
                         * upper fs supports it (in which case it is a
                         * mount point), otherwise fallback to the
                         * traditional stat() logic */
                        nosupp = true;
                else
                        return -errno;
        }

        r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
        if (r < 0) {
                if (errno == EOPNOTSUPP) {
                        if (nosupp)
                                /* Neither parent nor child do name_to_handle_at()?
                                   We have no choice but to fall back. */
                                goto fallback_fdinfo;
                        else
                                /* The parent can't do name_to_handle_at() but the
                                 * directory we are interested in can?
                                 * If so, it must be a mount point. */
                                return 1;
                } else
                        return -errno;
        }

        /* The parent can do name_to_handle_at() but the
         * directory we are interested in can't? If so, it
         * must be a mount point. */
        if (nosupp)
                return 1;

        /* If the file handle for the directory we are
         * interested in and its parent are identical, we
         * assume this is the root directory, which is a mount
         * point. */

        if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
            h.handle.handle_type == h_parent.handle.handle_type &&
            memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
                return 1;

        return mount_id != mount_id_parent;

fallback_fdinfo:
        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
        if (r == -EOPNOTSUPP)
                goto fallback_fstat;
        if (r < 0)
                return r;

        r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
        if (r < 0)
                return r;

        if (mount_id != mount_id_parent)
                return 1;

        /* Hmm, so, the mount ids are the same. This leaves one
         * special case though for the root file system. For that,
         * let's see if the parent directory has the same inode as we
         * are interested in. Hence, let's also do fstat() checks now,
         * too, but avoid the st_dev comparisons, since they aren't
         * that useful on unionfs mounts. */
        check_st_dev = false;

fallback_fstat:
        /* yay for fstatat() taking a different set of flags than the other
         * _at() above */
        if (flags & AT_SYMLINK_FOLLOW)
                flags &= ~AT_SYMLINK_FOLLOW;
        else
                flags |= AT_SYMLINK_NOFOLLOW;
        if (fstatat(fd, filename, &a, flags) < 0)
                return -errno;

        if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
                return -errno;

        /* A directory with same device and inode as its parent? Must
         * be the root directory */
        if (a.st_dev == b.st_dev &&
            a.st_ino == b.st_ino)
                return 1;

        return check_st_dev && (a.st_dev != b.st_dev);
}

/* flags can be AT_SYMLINK_FOLLOW or 0 */
int path_is_mount_point(const char *t, int flags) {
        _cleanup_close_ int fd = -1;
        _cleanup_free_ char *canonical = NULL, *parent = NULL;

        assert(t);

        if (path_equal(t, "/"))
                return 1;

        /* we need to resolve symlinks manually, we can't just rely on
         * fd_is_mount_point() to do that for us; if we have a structure like
         * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
         * look at needs to be /usr, not /. */
        if (flags & AT_SYMLINK_FOLLOW) {
                canonical = canonicalize_file_name(t);
                if (!canonical)
                        return -errno;

                t = canonical;
        }

        parent = dirname_malloc(t);
        if (!parent)
                return -ENOMEM;

        fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
        if (fd < 0)
                return -errno;

        return fd_is_mount_point(fd, basename(t), flags);
}

int umount_recursive(const char *prefix, int flags) {
        bool again;
        int n = 0, r;

        /* Try to umount everything recursively below a
         * directory. Also, take care of stacked mounts, and keep
         * unmounting them until they are gone. */

        do {
                _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;

                again = false;
                r = 0;

                proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
                if (!proc_self_mountinfo)
                        return -errno;

                for (;;) {
                        _cleanup_free_ char *path = NULL, *p = NULL;
                        int k;

                        k = fscanf(proc_self_mountinfo,
                                   "%*s "       /* (1) mount id */
                                   "%*s "       /* (2) parent id */
                                   "%*s "       /* (3) major:minor */
                                   "%*s "       /* (4) root */
                                   "%ms "       /* (5) mount point */
                                   "%*s"        /* (6) mount options */
                                   "%*[^-]"     /* (7) optional fields */
                                   "- "         /* (8) separator */
                                   "%*s "       /* (9) file system type */
                                   "%*s"        /* (10) mount source */
                                   "%*s"        /* (11) mount options 2 */
                                   "%*[^\n]",   /* some rubbish at the end */
                                   &path);
                        if (k != 1) {
                                if (k == EOF)
                                        break;

                                continue;
                        }

                        r = cunescape(path, UNESCAPE_RELAX, &p);
                        if (r < 0)
                                return r;

                        if (!path_startswith(p, prefix))
                                continue;

                        if (umount2(p, flags) < 0) {
                                r = -errno;
                                continue;
                        }

                        again = true;
                        n++;

                        break;
                }

        } while (again);

        return r ? r : n;
}

static int get_mount_flags(const char *path, unsigned long *flags) {
        struct statvfs buf;

        if (statvfs(path, &buf) < 0)
                return -errno;
        *flags = buf.f_flag;
        return 0;
}

int bind_remount_recursive(const char *prefix, bool ro) {
        _cleanup_set_free_free_ Set *done = NULL;
        _cleanup_free_ char *cleaned = NULL;
        int r;

        /* Recursively remount a directory (and all its submounts)
         * read-only or read-write. If the directory is already
         * mounted, we reuse the mount and simply mark it
         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
         * operation). If it isn't we first make it one. Afterwards we
         * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
         * submounts we can access, too. When mounts are stacked on
         * the same mount point we only care for each individual
         * "top-level" mount on each point, as we cannot
         * influence/access the underlying mounts anyway. We do not
         * have any effect on future submounts that might get
         * propagated, they migt be writable. This includes future
         * submounts that have been triggered via autofs. */

        cleaned = strdup(prefix);
        if (!cleaned)
                return -ENOMEM;

        path_kill_slashes(cleaned);

        done = set_new(&string_hash_ops);
        if (!done)
                return -ENOMEM;

        for (;;) {
                _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
                _cleanup_set_free_free_ Set *todo = NULL;
                bool top_autofs = false;
                char *x;
                unsigned long orig_flags;

                todo = set_new(&string_hash_ops);
                if (!todo)
                        return -ENOMEM;

                proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
                if (!proc_self_mountinfo)
                        return -errno;

                for (;;) {
                        _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
                        int k;

                        k = fscanf(proc_self_mountinfo,
                                   "%*s "       /* (1) mount id */
                                   "%*s "       /* (2) parent id */
                                   "%*s "       /* (3) major:minor */
                                   "%*s "       /* (4) root */
                                   "%ms "       /* (5) mount point */
                                   "%*s"        /* (6) mount options (superblock) */
                                   "%*[^-]"     /* (7) optional fields */
                                   "- "         /* (8) separator */
                                   "%ms "       /* (9) file system type */
                                   "%*s"        /* (10) mount source */
                                   "%*s"        /* (11) mount options (bind mount) */
                                   "%*[^\n]",   /* some rubbish at the end */
                                   &path,
                                   &type);
                        if (k != 2) {
                                if (k == EOF)
                                        break;

                                continue;
                        }

                        r = cunescape(path, UNESCAPE_RELAX, &p);
                        if (r < 0)
                                return r;

                        /* Let's ignore autofs mounts.  If they aren't
                         * triggered yet, we want to avoid triggering
                         * them, as we don't make any guarantees for
                         * future submounts anyway.  If they are
                         * already triggered, then we will find
                         * another entry for this. */
                        if (streq(type, "autofs")) {
                                top_autofs = top_autofs || path_equal(cleaned, p);
                                continue;
                        }

                        if (path_startswith(p, cleaned) &&
                            !set_contains(done, p)) {

                                r = set_consume(todo, p);
                                p = NULL;

                                if (r == -EEXIST)
                                        continue;
                                if (r < 0)
                                        return r;
                        }
                }

                /* If we have no submounts to process anymore and if
                 * the root is either already done, or an autofs, we
                 * are done */
                if (set_isempty(todo) &&
                    (top_autofs || set_contains(done, cleaned)))
                        return 0;

                if (!set_contains(done, cleaned) &&
                    !set_contains(todo, cleaned)) {
                        /* The prefix directory itself is not yet a
                         * mount, make it one. */
                        if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
                                return -errno;

                        orig_flags = 0;
                        (void) get_mount_flags(cleaned, &orig_flags);
                        orig_flags &= ~MS_RDONLY;

                        if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
                                return -errno;

                        x = strdup(cleaned);
                        if (!x)
                                return -ENOMEM;

                        r = set_consume(done, x);
                        if (r < 0)
                                return r;
                }

                while ((x = set_steal_first(todo))) {

                        r = set_consume(done, x);
                        if (r == -EEXIST || r == 0)
                                continue;
                        if (r < 0)
                                return r;

                        /* Try to reuse the original flag set, but
                         * don't care for errors, in case of
                         * obstructed mounts */
                        orig_flags = 0;
                        (void) get_mount_flags(x, &orig_flags);
                        orig_flags &= ~MS_RDONLY;

                        if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {

                                /* Deal with mount points that are
                                 * obstructed by a later mount */

                                if (errno != ENOENT)
                                        return -errno;
                        }

                }
        }
}

int mount_move_root(const char *path) {
        assert(path);

        if (chdir(path) < 0)
                return -errno;

        if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
                return -errno;

        if (chroot(".") < 0)
                return -errno;

        if (chdir("/") < 0)
                return -errno;

        return 0;
}
Commit	Line	Data
4349cd7c LP	1	/-- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil --/
	2
	3	/***
	4	This file is part of systemd.
	5
	6	Copyright 2010 Lennart Poettering
	7
	8	systemd is free software; you can redistribute it and/or modify it
	9	under the terms of the GNU Lesser General Public License as published by
	10	the Free Software Foundation; either version 2.1 of the License, or
	11	(at your option) any later version.
	12
	13	systemd is distributed in the hope that it will be useful, but
	14	WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public License
	19	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	20	***/
	21
	22	#include <string.h>
	23	#include <sys/mount.h>
	24	#include <sys/statvfs.h>
	25
b5efdb8a	26	#include "alloc-util.h"
4349cd7c LP	27	#include "escape.h"
	28	#include "fd-util.h"
	29	#include "fileio.h"
	30	#include "mount-util.h"
	31	#include "parse-util.h"
	32	#include "path-util.h"
	33	#include "set.h"
15a5e950	34	#include "stdio-util.h"
4349cd7c LP	35	#include "string-util.h"
	36	#include "util.h"
	37
	38	static int fd_fdinfo_mnt_id(int fd, const char filename, int flags, int mnt_id) {
	39	char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
	40	_cleanup_free_ char *fdinfo = NULL;
	41	_cleanup_close_ int subfd = -1;
	42	char *p;
	43	int r;
	44
	45	if ((flags & AT_EMPTY_PATH) && isempty(filename))
	46	xsprintf(path, "/proc/self/fdinfo/%i", fd);
	47	else {
	48	subfd = openat(fd, filename, O_RDONLY\|O_CLOEXEC\|O_NOCTTY\|O_PATH);
	49	if (subfd < 0)
	50	return -errno;
	51
	52	xsprintf(path, "/proc/self/fdinfo/%i", subfd);
	53	}
	54
	55	r = read_full_file(path, &fdinfo, NULL);
	56	if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
	57	return -EOPNOTSUPP;
	58	if (r < 0)
	59	return -errno;
	60
	61	p = startswith(fdinfo, "mnt_id:");
	62	if (!p) {
	63	p = strstr(fdinfo, "\nmnt_id:");
	64	if (!p) /* The mnt_id field is a relatively new addition */
	65	return -EOPNOTSUPP;
	66
	67	p += 8;
	68	}
	69
	70	p += strspn(p, WHITESPACE);
	71	p[strcspn(p, WHITESPACE)] = 0;
	72
	73	return safe_atoi(p, mnt_id);
	74	}
	75
	76
	77	int fd_is_mount_point(int fd, const char *filename, int flags) {
	78	union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
	79	int mount_id = -1, mount_id_parent = -1;
	80	bool nosupp = false, check_st_dev = true;
	81	struct stat a, b;
	82	int r;
	83
	84	assert(fd >= 0);
	85	assert(filename);
	86
	87	/* First we will try the name_to_handle_at() syscall, which
	88	* tells us the mount id and an opaque file "handle". It is
	89	* not supported everywhere though (kernel compile-time
	90	* option, not all file systems are hooked up). If it works
	91	* the mount id is usually good enough to tell us whether
	92	* something is a mount point.
	93	*
	94	* If that didn't work we will try to read the mount id from
	95	* /proc/self/fdinfo/<fd>. This is almost as good as
	96	* name_to_handle_at(), however, does not return the
	97	* opaque file handle. The opaque file handle is pretty useful
	98	* to detect the root directory, which we should always
99	* consider a mount point. Hence we use this only as
100	* fallback. Exporting the mnt_id in fdinfo is a pretty recent
101	* kernel addition.
102	*
103	* As last fallback we do traditional fstat() based st_dev
104	* comparisons. This is how things were traditionally done,
105	* but unionfs breaks breaks this since it exposes file
106	* systems with a variety of st_dev reported. Also, btrfs
107	* subvolumes have different st_dev, even though they aren't
108	* real mounts of their own. */
109
110	r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
111	if (r < 0) {
112	if (errno == ENOSYS)
113	/* This kernel does not support name_to_handle_at()
114	* fall back to simpler logic. */
115	goto fallback_fdinfo;
116	else if (errno == EOPNOTSUPP)
117	/* This kernel or file system does not support
118	* name_to_handle_at(), hence let's see if the
119	* upper fs supports it (in which case it is a
120	* mount point), otherwise fallback to the
121	* traditional stat() logic */
122	nosupp = true;
123	else
124	return -errno;
125	}
126
127	r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
128	if (r < 0) {
129	if (errno == EOPNOTSUPP) {
130	if (nosupp)
131	/* Neither parent nor child do name_to_handle_at()?
132	We have no choice but to fall back. */
133	goto fallback_fdinfo;
134	else
135	/* The parent can't do name_to_handle_at() but the
136	* directory we are interested in can?
137	* If so, it must be a mount point. */
138	return 1;
139	} else
140	return -errno;
141	}
142
143	/* The parent can do name_to_handle_at() but the
144	* directory we are interested in can't? If so, it
145	* must be a mount point. */
146	if (nosupp)
147	return 1;
148
149	/* If the file handle for the directory we are
150	* interested in and its parent are identical, we
151	* assume this is the root directory, which is a mount
152	* point. */
153
154	if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
155	h.handle.handle_type == h_parent.handle.handle_type &&
156	memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
157	return 1;
158
159	return mount_id != mount_id_parent;
160
161	fallback_fdinfo:
162	r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
163	if (r == -EOPNOTSUPP)
164	goto fallback_fstat;
165	if (r < 0)
166	return r;
167
168	r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
169	if (r < 0)
170	return r;
171
172	if (mount_id != mount_id_parent)
173	return 1;
174
175	/* Hmm, so, the mount ids are the same. This leaves one
176	* special case though for the root file system. For that,
177	* let's see if the parent directory has the same inode as we
178	* are interested in. Hence, let's also do fstat() checks now,
179	* too, but avoid the st_dev comparisons, since they aren't
180	* that useful on unionfs mounts. */
181	check_st_dev = false;
182
183	fallback_fstat:
184	/* yay for fstatat() taking a different set of flags than the other
185	* _at() above */
186	if (flags & AT_SYMLINK_FOLLOW)
187	flags &= ~AT_SYMLINK_FOLLOW;
188	else
189	flags \|= AT_SYMLINK_NOFOLLOW;
190	if (fstatat(fd, filename, &a, flags) < 0)
191	return -errno;
192
193	if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
194	return -errno;
195
196	/* A directory with same device and inode as its parent? Must
197	* be the root directory */
198	if (a.st_dev == b.st_dev &&
199	a.st_ino == b.st_ino)
200	return 1;
201
202	return check_st_dev && (a.st_dev != b.st_dev);
203	}
204
205	/* flags can be AT_SYMLINK_FOLLOW or 0 */
206	int path_is_mount_point(const char *t, int flags) {
207	_cleanup_close_ int fd = -1;
208	_cleanup_free_ char canonical = NULL, parent = NULL;
209
210	assert(t);
211
212	if (path_equal(t, "/"))
213	return 1;
214
215	/* we need to resolve symlinks manually, we can't just rely on
216	* fd_is_mount_point() to do that for us; if we have a structure like
217	* /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
218	* look at needs to be /usr, not /. */
219	if (flags & AT_SYMLINK_FOLLOW) {
220	canonical = canonicalize_file_name(t);
221	if (!canonical)
222	return -errno;
223
224	t = canonical;
225	}
226
227	parent = dirname_malloc(t);
228	if (!parent)
229	return -ENOMEM;
230
231	fd = openat(AT_FDCWD, parent, O_RDONLY\|O_NONBLOCK\|O_DIRECTORY\|O_CLOEXEC\|O_PATH);
232	if (fd < 0)
233	return -errno;
234
235	return fd_is_mount_point(fd, basename(t), flags);
236	}
237
238	int umount_recursive(const char *prefix, int flags) {
239	bool again;
240	int n = 0, r;
241
242	/* Try to umount everything recursively below a
243	* directory. Also, take care of stacked mounts, and keep
244	* unmounting them until they are gone. */
245
246	do {
247	_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
248
249	again = false;
250	r = 0;
251
252	proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
253	if (!proc_self_mountinfo)
254	return -errno;
255
256	for (;;) {
257	_cleanup_free_ char path = NULL, p = NULL;
258	int k;
259
260	k = fscanf(proc_self_mountinfo,
261	"%s " / (1) mount id */
262	"%s " / (2) parent id */
263	"%s " / (3) major:minor */
264	"%s " / (4) root */
265	"%ms " /* (5) mount point */
266	"%s" / (6) mount options */
267	"%[^-]" / (7) optional fields */
268	"- " /* (8) separator */
269	"%s " / (9) file system type */
270	"%s" / (10) mount source */
271	"%s" / (11) mount options 2 */
272	"%[^\n]", / some rubbish at the end */
273	&path);
274	if (k != 1) {
275	if (k == EOF)
276	break;
277
278	continue;
279	}
280
281	r = cunescape(path, UNESCAPE_RELAX, &p);
282	if (r < 0)
283	return r;
284
285	if (!path_startswith(p, prefix))
286	continue;
287
288	if (umount2(p, flags) < 0) {
289	r = -errno;
290	continue;
291	}
292
293	again = true;
294	n++;
295
296	break;
297	}
298
299	} while (again);
300
301	return r ? r : n;
302	}
303
304	static int get_mount_flags(const char path, unsigned long flags) {
305	struct statvfs buf;
306
307	if (statvfs(path, &buf) < 0)
308	return -errno;
309	*flags = buf.f_flag;
310	return 0;
311	}
312
313	int bind_remount_recursive(const char *prefix, bool ro) {
314	_cleanup_set_free_free_ Set *done = NULL;
315	_cleanup_free_ char *cleaned = NULL;
316	int r;
317
318	/* Recursively remount a directory (and all its submounts)
319	* read-only or read-write. If the directory is already
320	* mounted, we reuse the mount and simply mark it
321	* MS_BIND\|MS_RDONLY (or remove the MS_RDONLY for read-write
322	* operation). If it isn't we first make it one. Afterwards we
323	* apply MS_BIND\|MS_RDONLY (or remove MS_RDONLY) to all
324	* submounts we can access, too. When mounts are stacked on
325	* the same mount point we only care for each individual
326	* "top-level" mount on each point, as we cannot
327	* influence/access the underlying mounts anyway. We do not
328	* have any effect on future submounts that might get
329	* propagated, they migt be writable. This includes future
330	* submounts that have been triggered via autofs. */
331
332	cleaned = strdup(prefix);
333	if (!cleaned)
334	return -ENOMEM;
335
336	path_kill_slashes(cleaned);
337
338	done = set_new(&string_hash_ops);
339	if (!done)
340	return -ENOMEM;
341
342	for (;;) {
343	_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
344	_cleanup_set_free_free_ Set *todo = NULL;
345	bool top_autofs = false;
346	char *x;
347	unsigned long orig_flags;
348
349	todo = set_new(&string_hash_ops);
350	if (!todo)
351	return -ENOMEM;
352
353	proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
354	if (!proc_self_mountinfo)
355	return -errno;
356
357	for (;;) {
358	_cleanup_free_ char path = NULL, p = NULL, *type = NULL;
359	int k;
360
361	k = fscanf(proc_self_mountinfo,
362	"%s " / (1) mount id */
363	"%s " / (2) parent id */
364	"%s " / (3) major:minor */
365	"%s " / (4) root */
366	"%ms " /* (5) mount point */
367	"%s" / (6) mount options (superblock) */
368	"%[^-]" / (7) optional fields */
369	"- " /* (8) separator */
370	"%ms " /* (9) file system type */
371	"%s" / (10) mount source */
372	"%s" / (11) mount options (bind mount) */
373	"%[^\n]", / some rubbish at the end */
374	&path,
375	&type);
376	if (k != 2) {
377	if (k == EOF)
378	break;
379
380	continue;
381	}
382
383	r = cunescape(path, UNESCAPE_RELAX, &p);
384	if (r < 0)
385	return r;
386
387	/* Let's ignore autofs mounts. If they aren't
388	* triggered yet, we want to avoid triggering
389	* them, as we don't make any guarantees for
390	* future submounts anyway. If they are
391	* already triggered, then we will find
392	* another entry for this. */
393	if (streq(type, "autofs")) {
394	top_autofs = top_autofs \|\| path_equal(cleaned, p);
395	continue;
396	}
397
398	if (path_startswith(p, cleaned) &&
399	!set_contains(done, p)) {
400
401	r = set_consume(todo, p);
402	p = NULL;
403
404	if (r == -EEXIST)
405	continue;
406	if (r < 0)
407	return r;
408	}
409	}
410
411	/* If we have no submounts to process anymore and if
412	* the root is either already done, or an autofs, we
413	* are done */
414	if (set_isempty(todo) &&
415	(top_autofs \|\| set_contains(done, cleaned)))
416	return 0;
417
418	if (!set_contains(done, cleaned) &&
419	!set_contains(todo, cleaned)) {
420	/* The prefix directory itself is not yet a
421	* mount, make it one. */
422	if (mount(cleaned, cleaned, NULL, MS_BIND\|MS_REC, NULL) < 0)
423	return -errno;
424
425	orig_flags = 0;
426	(void) get_mount_flags(cleaned, &orig_flags);
427	orig_flags &= ~MS_RDONLY;
428
429	if (mount(NULL, prefix, NULL, orig_flags\|MS_BIND\|MS_REMOUNT\|(ro ? MS_RDONLY : 0), NULL) < 0)
430	return -errno;
431
432	x = strdup(cleaned);
433	if (!x)
434	return -ENOMEM;
435
436	r = set_consume(done, x);
437	if (r < 0)
438	return r;
439	}
440
441	while ((x = set_steal_first(todo))) {
442
443	r = set_consume(done, x);
444	if (r == -EEXIST \|\| r == 0)
445	continue;
446	if (r < 0)
447	return r;
448
449	/* Try to reuse the original flag set, but
450	* don't care for errors, in case of
451	* obstructed mounts */
452	orig_flags = 0;
453	(void) get_mount_flags(x, &orig_flags);
454	orig_flags &= ~MS_RDONLY;
455
456	if (mount(NULL, x, NULL, orig_flags\|MS_BIND\|MS_REMOUNT\|(ro ? MS_RDONLY : 0), NULL) < 0) {
457
458	/* Deal with mount points that are
459	* obstructed by a later mount */
460
461	if (errno != ENOENT)
462	return -errno;
463	}
464
465	}
466	}
467	}
468
469	int mount_move_root(const char *path) {
470	assert(path);
471
472	if (chdir(path) < 0)
473	return -errno;
474
475	if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
476	return -errno;
477
478	if (chroot(".") < 0)
479	return -errno;
480
481	if (chdir("/") < 0)
482	return -errno;
483
484	return 0;
485	}