]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
macro.h: move definition of MODE_INVALID to parse-util.h
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
4349cd7c
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <string.h>
23#include <sys/mount.h>
24#include <sys/statvfs.h>
25
26#include "escape.h"
27#include "fd-util.h"
28#include "fileio.h"
29#include "mount-util.h"
30#include "parse-util.h"
31#include "path-util.h"
32#include "set.h"
33#include "string-util.h"
34#include "util.h"
35
36static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
37 char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
38 _cleanup_free_ char *fdinfo = NULL;
39 _cleanup_close_ int subfd = -1;
40 char *p;
41 int r;
42
43 if ((flags & AT_EMPTY_PATH) && isempty(filename))
44 xsprintf(path, "/proc/self/fdinfo/%i", fd);
45 else {
46 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
47 if (subfd < 0)
48 return -errno;
49
50 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
51 }
52
53 r = read_full_file(path, &fdinfo, NULL);
54 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
55 return -EOPNOTSUPP;
56 if (r < 0)
57 return -errno;
58
59 p = startswith(fdinfo, "mnt_id:");
60 if (!p) {
61 p = strstr(fdinfo, "\nmnt_id:");
62 if (!p) /* The mnt_id field is a relatively new addition */
63 return -EOPNOTSUPP;
64
65 p += 8;
66 }
67
68 p += strspn(p, WHITESPACE);
69 p[strcspn(p, WHITESPACE)] = 0;
70
71 return safe_atoi(p, mnt_id);
72}
73
74
75int fd_is_mount_point(int fd, const char *filename, int flags) {
76 union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
77 int mount_id = -1, mount_id_parent = -1;
78 bool nosupp = false, check_st_dev = true;
79 struct stat a, b;
80 int r;
81
82 assert(fd >= 0);
83 assert(filename);
84
85 /* First we will try the name_to_handle_at() syscall, which
86 * tells us the mount id and an opaque file "handle". It is
87 * not supported everywhere though (kernel compile-time
88 * option, not all file systems are hooked up). If it works
89 * the mount id is usually good enough to tell us whether
90 * something is a mount point.
91 *
92 * If that didn't work we will try to read the mount id from
93 * /proc/self/fdinfo/<fd>. This is almost as good as
94 * name_to_handle_at(), however, does not return the
95 * opaque file handle. The opaque file handle is pretty useful
96 * to detect the root directory, which we should always
97 * consider a mount point. Hence we use this only as
98 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
99 * kernel addition.
100 *
101 * As last fallback we do traditional fstat() based st_dev
102 * comparisons. This is how things were traditionally done,
103 * but unionfs breaks breaks this since it exposes file
104 * systems with a variety of st_dev reported. Also, btrfs
105 * subvolumes have different st_dev, even though they aren't
106 * real mounts of their own. */
107
108 r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
109 if (r < 0) {
110 if (errno == ENOSYS)
111 /* This kernel does not support name_to_handle_at()
112 * fall back to simpler logic. */
113 goto fallback_fdinfo;
114 else if (errno == EOPNOTSUPP)
115 /* This kernel or file system does not support
116 * name_to_handle_at(), hence let's see if the
117 * upper fs supports it (in which case it is a
118 * mount point), otherwise fallback to the
119 * traditional stat() logic */
120 nosupp = true;
121 else
122 return -errno;
123 }
124
125 r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
126 if (r < 0) {
127 if (errno == EOPNOTSUPP) {
128 if (nosupp)
129 /* Neither parent nor child do name_to_handle_at()?
130 We have no choice but to fall back. */
131 goto fallback_fdinfo;
132 else
133 /* The parent can't do name_to_handle_at() but the
134 * directory we are interested in can?
135 * If so, it must be a mount point. */
136 return 1;
137 } else
138 return -errno;
139 }
140
141 /* The parent can do name_to_handle_at() but the
142 * directory we are interested in can't? If so, it
143 * must be a mount point. */
144 if (nosupp)
145 return 1;
146
147 /* If the file handle for the directory we are
148 * interested in and its parent are identical, we
149 * assume this is the root directory, which is a mount
150 * point. */
151
152 if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
153 h.handle.handle_type == h_parent.handle.handle_type &&
154 memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
155 return 1;
156
157 return mount_id != mount_id_parent;
158
159fallback_fdinfo:
160 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
161 if (r == -EOPNOTSUPP)
162 goto fallback_fstat;
163 if (r < 0)
164 return r;
165
166 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
167 if (r < 0)
168 return r;
169
170 if (mount_id != mount_id_parent)
171 return 1;
172
173 /* Hmm, so, the mount ids are the same. This leaves one
174 * special case though for the root file system. For that,
175 * let's see if the parent directory has the same inode as we
176 * are interested in. Hence, let's also do fstat() checks now,
177 * too, but avoid the st_dev comparisons, since they aren't
178 * that useful on unionfs mounts. */
179 check_st_dev = false;
180
181fallback_fstat:
182 /* yay for fstatat() taking a different set of flags than the other
183 * _at() above */
184 if (flags & AT_SYMLINK_FOLLOW)
185 flags &= ~AT_SYMLINK_FOLLOW;
186 else
187 flags |= AT_SYMLINK_NOFOLLOW;
188 if (fstatat(fd, filename, &a, flags) < 0)
189 return -errno;
190
191 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
192 return -errno;
193
194 /* A directory with same device and inode as its parent? Must
195 * be the root directory */
196 if (a.st_dev == b.st_dev &&
197 a.st_ino == b.st_ino)
198 return 1;
199
200 return check_st_dev && (a.st_dev != b.st_dev);
201}
202
203/* flags can be AT_SYMLINK_FOLLOW or 0 */
204int path_is_mount_point(const char *t, int flags) {
205 _cleanup_close_ int fd = -1;
206 _cleanup_free_ char *canonical = NULL, *parent = NULL;
207
208 assert(t);
209
210 if (path_equal(t, "/"))
211 return 1;
212
213 /* we need to resolve symlinks manually, we can't just rely on
214 * fd_is_mount_point() to do that for us; if we have a structure like
215 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
216 * look at needs to be /usr, not /. */
217 if (flags & AT_SYMLINK_FOLLOW) {
218 canonical = canonicalize_file_name(t);
219 if (!canonical)
220 return -errno;
221
222 t = canonical;
223 }
224
225 parent = dirname_malloc(t);
226 if (!parent)
227 return -ENOMEM;
228
229 fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
230 if (fd < 0)
231 return -errno;
232
233 return fd_is_mount_point(fd, basename(t), flags);
234}
235
236int umount_recursive(const char *prefix, int flags) {
237 bool again;
238 int n = 0, r;
239
240 /* Try to umount everything recursively below a
241 * directory. Also, take care of stacked mounts, and keep
242 * unmounting them until they are gone. */
243
244 do {
245 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
246
247 again = false;
248 r = 0;
249
250 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
251 if (!proc_self_mountinfo)
252 return -errno;
253
254 for (;;) {
255 _cleanup_free_ char *path = NULL, *p = NULL;
256 int k;
257
258 k = fscanf(proc_self_mountinfo,
259 "%*s " /* (1) mount id */
260 "%*s " /* (2) parent id */
261 "%*s " /* (3) major:minor */
262 "%*s " /* (4) root */
263 "%ms " /* (5) mount point */
264 "%*s" /* (6) mount options */
265 "%*[^-]" /* (7) optional fields */
266 "- " /* (8) separator */
267 "%*s " /* (9) file system type */
268 "%*s" /* (10) mount source */
269 "%*s" /* (11) mount options 2 */
270 "%*[^\n]", /* some rubbish at the end */
271 &path);
272 if (k != 1) {
273 if (k == EOF)
274 break;
275
276 continue;
277 }
278
279 r = cunescape(path, UNESCAPE_RELAX, &p);
280 if (r < 0)
281 return r;
282
283 if (!path_startswith(p, prefix))
284 continue;
285
286 if (umount2(p, flags) < 0) {
287 r = -errno;
288 continue;
289 }
290
291 again = true;
292 n++;
293
294 break;
295 }
296
297 } while (again);
298
299 return r ? r : n;
300}
301
302static int get_mount_flags(const char *path, unsigned long *flags) {
303 struct statvfs buf;
304
305 if (statvfs(path, &buf) < 0)
306 return -errno;
307 *flags = buf.f_flag;
308 return 0;
309}
310
311int bind_remount_recursive(const char *prefix, bool ro) {
312 _cleanup_set_free_free_ Set *done = NULL;
313 _cleanup_free_ char *cleaned = NULL;
314 int r;
315
316 /* Recursively remount a directory (and all its submounts)
317 * read-only or read-write. If the directory is already
318 * mounted, we reuse the mount and simply mark it
319 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
320 * operation). If it isn't we first make it one. Afterwards we
321 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
322 * submounts we can access, too. When mounts are stacked on
323 * the same mount point we only care for each individual
324 * "top-level" mount on each point, as we cannot
325 * influence/access the underlying mounts anyway. We do not
326 * have any effect on future submounts that might get
327 * propagated, they migt be writable. This includes future
328 * submounts that have been triggered via autofs. */
329
330 cleaned = strdup(prefix);
331 if (!cleaned)
332 return -ENOMEM;
333
334 path_kill_slashes(cleaned);
335
336 done = set_new(&string_hash_ops);
337 if (!done)
338 return -ENOMEM;
339
340 for (;;) {
341 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
342 _cleanup_set_free_free_ Set *todo = NULL;
343 bool top_autofs = false;
344 char *x;
345 unsigned long orig_flags;
346
347 todo = set_new(&string_hash_ops);
348 if (!todo)
349 return -ENOMEM;
350
351 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
352 if (!proc_self_mountinfo)
353 return -errno;
354
355 for (;;) {
356 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
357 int k;
358
359 k = fscanf(proc_self_mountinfo,
360 "%*s " /* (1) mount id */
361 "%*s " /* (2) parent id */
362 "%*s " /* (3) major:minor */
363 "%*s " /* (4) root */
364 "%ms " /* (5) mount point */
365 "%*s" /* (6) mount options (superblock) */
366 "%*[^-]" /* (7) optional fields */
367 "- " /* (8) separator */
368 "%ms " /* (9) file system type */
369 "%*s" /* (10) mount source */
370 "%*s" /* (11) mount options (bind mount) */
371 "%*[^\n]", /* some rubbish at the end */
372 &path,
373 &type);
374 if (k != 2) {
375 if (k == EOF)
376 break;
377
378 continue;
379 }
380
381 r = cunescape(path, UNESCAPE_RELAX, &p);
382 if (r < 0)
383 return r;
384
385 /* Let's ignore autofs mounts. If they aren't
386 * triggered yet, we want to avoid triggering
387 * them, as we don't make any guarantees for
388 * future submounts anyway. If they are
389 * already triggered, then we will find
390 * another entry for this. */
391 if (streq(type, "autofs")) {
392 top_autofs = top_autofs || path_equal(cleaned, p);
393 continue;
394 }
395
396 if (path_startswith(p, cleaned) &&
397 !set_contains(done, p)) {
398
399 r = set_consume(todo, p);
400 p = NULL;
401
402 if (r == -EEXIST)
403 continue;
404 if (r < 0)
405 return r;
406 }
407 }
408
409 /* If we have no submounts to process anymore and if
410 * the root is either already done, or an autofs, we
411 * are done */
412 if (set_isempty(todo) &&
413 (top_autofs || set_contains(done, cleaned)))
414 return 0;
415
416 if (!set_contains(done, cleaned) &&
417 !set_contains(todo, cleaned)) {
418 /* The prefix directory itself is not yet a
419 * mount, make it one. */
420 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
421 return -errno;
422
423 orig_flags = 0;
424 (void) get_mount_flags(cleaned, &orig_flags);
425 orig_flags &= ~MS_RDONLY;
426
427 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
428 return -errno;
429
430 x = strdup(cleaned);
431 if (!x)
432 return -ENOMEM;
433
434 r = set_consume(done, x);
435 if (r < 0)
436 return r;
437 }
438
439 while ((x = set_steal_first(todo))) {
440
441 r = set_consume(done, x);
442 if (r == -EEXIST || r == 0)
443 continue;
444 if (r < 0)
445 return r;
446
447 /* Try to reuse the original flag set, but
448 * don't care for errors, in case of
449 * obstructed mounts */
450 orig_flags = 0;
451 (void) get_mount_flags(x, &orig_flags);
452 orig_flags &= ~MS_RDONLY;
453
454 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
455
456 /* Deal with mount points that are
457 * obstructed by a later mount */
458
459 if (errno != ENOENT)
460 return -errno;
461 }
462
463 }
464 }
465}
466
467int mount_move_root(const char *path) {
468 assert(path);
469
470 if (chdir(path) < 0)
471 return -errno;
472
473 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
474 return -errno;
475
476 if (chroot(".") < 0)
477 return -errno;
478
479 if (chdir("/") < 0)
480 return -errno;
481
482 return 0;
483}