]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
update TODO
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
4349cd7c
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <string.h>
23#include <sys/mount.h>
24#include <sys/statvfs.h>
25
b5efdb8a 26#include "alloc-util.h"
4349cd7c
LP
27#include "escape.h"
28#include "fd-util.h"
29#include "fileio.h"
30#include "mount-util.h"
31#include "parse-util.h"
32#include "path-util.h"
33#include "set.h"
15a5e950 34#include "stdio-util.h"
4349cd7c
LP
35#include "string-util.h"
36#include "util.h"
37
38static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
39 char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
40 _cleanup_free_ char *fdinfo = NULL;
41 _cleanup_close_ int subfd = -1;
42 char *p;
43 int r;
44
45 if ((flags & AT_EMPTY_PATH) && isempty(filename))
46 xsprintf(path, "/proc/self/fdinfo/%i", fd);
47 else {
48 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
49 if (subfd < 0)
50 return -errno;
51
52 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
53 }
54
55 r = read_full_file(path, &fdinfo, NULL);
56 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
57 return -EOPNOTSUPP;
58 if (r < 0)
59 return -errno;
60
61 p = startswith(fdinfo, "mnt_id:");
62 if (!p) {
63 p = strstr(fdinfo, "\nmnt_id:");
64 if (!p) /* The mnt_id field is a relatively new addition */
65 return -EOPNOTSUPP;
66
67 p += 8;
68 }
69
70 p += strspn(p, WHITESPACE);
71 p[strcspn(p, WHITESPACE)] = 0;
72
73 return safe_atoi(p, mnt_id);
74}
75
76
77int fd_is_mount_point(int fd, const char *filename, int flags) {
78 union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
79 int mount_id = -1, mount_id_parent = -1;
80 bool nosupp = false, check_st_dev = true;
81 struct stat a, b;
82 int r;
83
84 assert(fd >= 0);
85 assert(filename);
86
87 /* First we will try the name_to_handle_at() syscall, which
88 * tells us the mount id and an opaque file "handle". It is
89 * not supported everywhere though (kernel compile-time
90 * option, not all file systems are hooked up). If it works
91 * the mount id is usually good enough to tell us whether
92 * something is a mount point.
93 *
94 * If that didn't work we will try to read the mount id from
95 * /proc/self/fdinfo/<fd>. This is almost as good as
96 * name_to_handle_at(), however, does not return the
97 * opaque file handle. The opaque file handle is pretty useful
98 * to detect the root directory, which we should always
99 * consider a mount point. Hence we use this only as
100 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
101 * kernel addition.
102 *
103 * As last fallback we do traditional fstat() based st_dev
104 * comparisons. This is how things were traditionally done,
105 * but unionfs breaks breaks this since it exposes file
106 * systems with a variety of st_dev reported. Also, btrfs
107 * subvolumes have different st_dev, even though they aren't
108 * real mounts of their own. */
109
110 r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
111 if (r < 0) {
112 if (errno == ENOSYS)
113 /* This kernel does not support name_to_handle_at()
114 * fall back to simpler logic. */
115 goto fallback_fdinfo;
116 else if (errno == EOPNOTSUPP)
117 /* This kernel or file system does not support
118 * name_to_handle_at(), hence let's see if the
119 * upper fs supports it (in which case it is a
120 * mount point), otherwise fallback to the
121 * traditional stat() logic */
122 nosupp = true;
123 else
124 return -errno;
125 }
126
127 r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
128 if (r < 0) {
129 if (errno == EOPNOTSUPP) {
130 if (nosupp)
131 /* Neither parent nor child do name_to_handle_at()?
132 We have no choice but to fall back. */
133 goto fallback_fdinfo;
134 else
135 /* The parent can't do name_to_handle_at() but the
136 * directory we are interested in can?
137 * If so, it must be a mount point. */
138 return 1;
139 } else
140 return -errno;
141 }
142
143 /* The parent can do name_to_handle_at() but the
144 * directory we are interested in can't? If so, it
145 * must be a mount point. */
146 if (nosupp)
147 return 1;
148
149 /* If the file handle for the directory we are
150 * interested in and its parent are identical, we
151 * assume this is the root directory, which is a mount
152 * point. */
153
154 if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
155 h.handle.handle_type == h_parent.handle.handle_type &&
156 memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
157 return 1;
158
159 return mount_id != mount_id_parent;
160
161fallback_fdinfo:
162 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
163 if (r == -EOPNOTSUPP)
164 goto fallback_fstat;
165 if (r < 0)
166 return r;
167
168 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
169 if (r < 0)
170 return r;
171
172 if (mount_id != mount_id_parent)
173 return 1;
174
175 /* Hmm, so, the mount ids are the same. This leaves one
176 * special case though for the root file system. For that,
177 * let's see if the parent directory has the same inode as we
178 * are interested in. Hence, let's also do fstat() checks now,
179 * too, but avoid the st_dev comparisons, since they aren't
180 * that useful on unionfs mounts. */
181 check_st_dev = false;
182
183fallback_fstat:
184 /* yay for fstatat() taking a different set of flags than the other
185 * _at() above */
186 if (flags & AT_SYMLINK_FOLLOW)
187 flags &= ~AT_SYMLINK_FOLLOW;
188 else
189 flags |= AT_SYMLINK_NOFOLLOW;
190 if (fstatat(fd, filename, &a, flags) < 0)
191 return -errno;
192
193 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
194 return -errno;
195
196 /* A directory with same device and inode as its parent? Must
197 * be the root directory */
198 if (a.st_dev == b.st_dev &&
199 a.st_ino == b.st_ino)
200 return 1;
201
202 return check_st_dev && (a.st_dev != b.st_dev);
203}
204
205/* flags can be AT_SYMLINK_FOLLOW or 0 */
206int path_is_mount_point(const char *t, int flags) {
207 _cleanup_close_ int fd = -1;
208 _cleanup_free_ char *canonical = NULL, *parent = NULL;
209
210 assert(t);
211
212 if (path_equal(t, "/"))
213 return 1;
214
215 /* we need to resolve symlinks manually, we can't just rely on
216 * fd_is_mount_point() to do that for us; if we have a structure like
217 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
218 * look at needs to be /usr, not /. */
219 if (flags & AT_SYMLINK_FOLLOW) {
220 canonical = canonicalize_file_name(t);
221 if (!canonical)
222 return -errno;
223
224 t = canonical;
225 }
226
227 parent = dirname_malloc(t);
228 if (!parent)
229 return -ENOMEM;
230
231 fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
232 if (fd < 0)
233 return -errno;
234
235 return fd_is_mount_point(fd, basename(t), flags);
236}
237
238int umount_recursive(const char *prefix, int flags) {
239 bool again;
240 int n = 0, r;
241
242 /* Try to umount everything recursively below a
243 * directory. Also, take care of stacked mounts, and keep
244 * unmounting them until they are gone. */
245
246 do {
247 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
248
249 again = false;
250 r = 0;
251
252 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
253 if (!proc_self_mountinfo)
254 return -errno;
255
256 for (;;) {
257 _cleanup_free_ char *path = NULL, *p = NULL;
258 int k;
259
260 k = fscanf(proc_self_mountinfo,
261 "%*s " /* (1) mount id */
262 "%*s " /* (2) parent id */
263 "%*s " /* (3) major:minor */
264 "%*s " /* (4) root */
265 "%ms " /* (5) mount point */
266 "%*s" /* (6) mount options */
267 "%*[^-]" /* (7) optional fields */
268 "- " /* (8) separator */
269 "%*s " /* (9) file system type */
270 "%*s" /* (10) mount source */
271 "%*s" /* (11) mount options 2 */
272 "%*[^\n]", /* some rubbish at the end */
273 &path);
274 if (k != 1) {
275 if (k == EOF)
276 break;
277
278 continue;
279 }
280
281 r = cunescape(path, UNESCAPE_RELAX, &p);
282 if (r < 0)
283 return r;
284
285 if (!path_startswith(p, prefix))
286 continue;
287
288 if (umount2(p, flags) < 0) {
289 r = -errno;
290 continue;
291 }
292
293 again = true;
294 n++;
295
296 break;
297 }
298
299 } while (again);
300
301 return r ? r : n;
302}
303
304static int get_mount_flags(const char *path, unsigned long *flags) {
305 struct statvfs buf;
306
307 if (statvfs(path, &buf) < 0)
308 return -errno;
309 *flags = buf.f_flag;
310 return 0;
311}
312
313int bind_remount_recursive(const char *prefix, bool ro) {
314 _cleanup_set_free_free_ Set *done = NULL;
315 _cleanup_free_ char *cleaned = NULL;
316 int r;
317
318 /* Recursively remount a directory (and all its submounts)
319 * read-only or read-write. If the directory is already
320 * mounted, we reuse the mount and simply mark it
321 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
322 * operation). If it isn't we first make it one. Afterwards we
323 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
324 * submounts we can access, too. When mounts are stacked on
325 * the same mount point we only care for each individual
326 * "top-level" mount on each point, as we cannot
327 * influence/access the underlying mounts anyway. We do not
328 * have any effect on future submounts that might get
329 * propagated, they migt be writable. This includes future
330 * submounts that have been triggered via autofs. */
331
332 cleaned = strdup(prefix);
333 if (!cleaned)
334 return -ENOMEM;
335
336 path_kill_slashes(cleaned);
337
338 done = set_new(&string_hash_ops);
339 if (!done)
340 return -ENOMEM;
341
342 for (;;) {
343 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
344 _cleanup_set_free_free_ Set *todo = NULL;
345 bool top_autofs = false;
346 char *x;
347 unsigned long orig_flags;
348
349 todo = set_new(&string_hash_ops);
350 if (!todo)
351 return -ENOMEM;
352
353 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
354 if (!proc_self_mountinfo)
355 return -errno;
356
357 for (;;) {
358 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
359 int k;
360
361 k = fscanf(proc_self_mountinfo,
362 "%*s " /* (1) mount id */
363 "%*s " /* (2) parent id */
364 "%*s " /* (3) major:minor */
365 "%*s " /* (4) root */
366 "%ms " /* (5) mount point */
367 "%*s" /* (6) mount options (superblock) */
368 "%*[^-]" /* (7) optional fields */
369 "- " /* (8) separator */
370 "%ms " /* (9) file system type */
371 "%*s" /* (10) mount source */
372 "%*s" /* (11) mount options (bind mount) */
373 "%*[^\n]", /* some rubbish at the end */
374 &path,
375 &type);
376 if (k != 2) {
377 if (k == EOF)
378 break;
379
380 continue;
381 }
382
383 r = cunescape(path, UNESCAPE_RELAX, &p);
384 if (r < 0)
385 return r;
386
387 /* Let's ignore autofs mounts. If they aren't
388 * triggered yet, we want to avoid triggering
389 * them, as we don't make any guarantees for
390 * future submounts anyway. If they are
391 * already triggered, then we will find
392 * another entry for this. */
393 if (streq(type, "autofs")) {
394 top_autofs = top_autofs || path_equal(cleaned, p);
395 continue;
396 }
397
398 if (path_startswith(p, cleaned) &&
399 !set_contains(done, p)) {
400
401 r = set_consume(todo, p);
402 p = NULL;
403
404 if (r == -EEXIST)
405 continue;
406 if (r < 0)
407 return r;
408 }
409 }
410
411 /* If we have no submounts to process anymore and if
412 * the root is either already done, or an autofs, we
413 * are done */
414 if (set_isempty(todo) &&
415 (top_autofs || set_contains(done, cleaned)))
416 return 0;
417
418 if (!set_contains(done, cleaned) &&
419 !set_contains(todo, cleaned)) {
420 /* The prefix directory itself is not yet a
421 * mount, make it one. */
422 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
423 return -errno;
424
425 orig_flags = 0;
426 (void) get_mount_flags(cleaned, &orig_flags);
427 orig_flags &= ~MS_RDONLY;
428
429 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
430 return -errno;
431
432 x = strdup(cleaned);
433 if (!x)
434 return -ENOMEM;
435
436 r = set_consume(done, x);
437 if (r < 0)
438 return r;
439 }
440
441 while ((x = set_steal_first(todo))) {
442
443 r = set_consume(done, x);
444 if (r == -EEXIST || r == 0)
445 continue;
446 if (r < 0)
447 return r;
448
449 /* Try to reuse the original flag set, but
450 * don't care for errors, in case of
451 * obstructed mounts */
452 orig_flags = 0;
453 (void) get_mount_flags(x, &orig_flags);
454 orig_flags &= ~MS_RDONLY;
455
456 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
457
458 /* Deal with mount points that are
459 * obstructed by a later mount */
460
461 if (errno != ENOENT)
462 return -errno;
463 }
464
465 }
466 }
467}
468
469int mount_move_root(const char *path) {
470 assert(path);
471
472 if (chdir(path) < 0)
473 return -errno;
474
475 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
476 return -errno;
477
478 if (chroot(".") < 0)
479 return -errno;
480
481 if (chdir("/") < 0)
482 return -errno;
483
484 return 0;
485}