]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/mount-util.c
24e88babcdc2216c63ad3d3f3dea26b064735b81
[thirdparty/systemd.git] / src / basic / mount-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <string.h>
23 #include <sys/mount.h>
24 #include <sys/statvfs.h>
25
26 #include "escape.h"
27 #include "fd-util.h"
28 #include "fileio.h"
29 #include "mount-util.h"
30 #include "parse-util.h"
31 #include "path-util.h"
32 #include "set.h"
33 #include "stdio-util.h"
34 #include "string-util.h"
35 #include "util.h"
36
37 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
38 char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
39 _cleanup_free_ char *fdinfo = NULL;
40 _cleanup_close_ int subfd = -1;
41 char *p;
42 int r;
43
44 if ((flags & AT_EMPTY_PATH) && isempty(filename))
45 xsprintf(path, "/proc/self/fdinfo/%i", fd);
46 else {
47 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
48 if (subfd < 0)
49 return -errno;
50
51 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
52 }
53
54 r = read_full_file(path, &fdinfo, NULL);
55 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
56 return -EOPNOTSUPP;
57 if (r < 0)
58 return -errno;
59
60 p = startswith(fdinfo, "mnt_id:");
61 if (!p) {
62 p = strstr(fdinfo, "\nmnt_id:");
63 if (!p) /* The mnt_id field is a relatively new addition */
64 return -EOPNOTSUPP;
65
66 p += 8;
67 }
68
69 p += strspn(p, WHITESPACE);
70 p[strcspn(p, WHITESPACE)] = 0;
71
72 return safe_atoi(p, mnt_id);
73 }
74
75
76 int fd_is_mount_point(int fd, const char *filename, int flags) {
77 union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
78 int mount_id = -1, mount_id_parent = -1;
79 bool nosupp = false, check_st_dev = true;
80 struct stat a, b;
81 int r;
82
83 assert(fd >= 0);
84 assert(filename);
85
86 /* First we will try the name_to_handle_at() syscall, which
87 * tells us the mount id and an opaque file "handle". It is
88 * not supported everywhere though (kernel compile-time
89 * option, not all file systems are hooked up). If it works
90 * the mount id is usually good enough to tell us whether
91 * something is a mount point.
92 *
93 * If that didn't work we will try to read the mount id from
94 * /proc/self/fdinfo/<fd>. This is almost as good as
95 * name_to_handle_at(), however, does not return the
96 * opaque file handle. The opaque file handle is pretty useful
97 * to detect the root directory, which we should always
98 * consider a mount point. Hence we use this only as
99 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
100 * kernel addition.
101 *
102 * As last fallback we do traditional fstat() based st_dev
103 * comparisons. This is how things were traditionally done,
104 * but unionfs breaks breaks this since it exposes file
105 * systems with a variety of st_dev reported. Also, btrfs
106 * subvolumes have different st_dev, even though they aren't
107 * real mounts of their own. */
108
109 r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
110 if (r < 0) {
111 if (errno == ENOSYS)
112 /* This kernel does not support name_to_handle_at()
113 * fall back to simpler logic. */
114 goto fallback_fdinfo;
115 else if (errno == EOPNOTSUPP)
116 /* This kernel or file system does not support
117 * name_to_handle_at(), hence let's see if the
118 * upper fs supports it (in which case it is a
119 * mount point), otherwise fallback to the
120 * traditional stat() logic */
121 nosupp = true;
122 else
123 return -errno;
124 }
125
126 r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
127 if (r < 0) {
128 if (errno == EOPNOTSUPP) {
129 if (nosupp)
130 /* Neither parent nor child do name_to_handle_at()?
131 We have no choice but to fall back. */
132 goto fallback_fdinfo;
133 else
134 /* The parent can't do name_to_handle_at() but the
135 * directory we are interested in can?
136 * If so, it must be a mount point. */
137 return 1;
138 } else
139 return -errno;
140 }
141
142 /* The parent can do name_to_handle_at() but the
143 * directory we are interested in can't? If so, it
144 * must be a mount point. */
145 if (nosupp)
146 return 1;
147
148 /* If the file handle for the directory we are
149 * interested in and its parent are identical, we
150 * assume this is the root directory, which is a mount
151 * point. */
152
153 if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
154 h.handle.handle_type == h_parent.handle.handle_type &&
155 memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
156 return 1;
157
158 return mount_id != mount_id_parent;
159
160 fallback_fdinfo:
161 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
162 if (r == -EOPNOTSUPP)
163 goto fallback_fstat;
164 if (r < 0)
165 return r;
166
167 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
168 if (r < 0)
169 return r;
170
171 if (mount_id != mount_id_parent)
172 return 1;
173
174 /* Hmm, so, the mount ids are the same. This leaves one
175 * special case though for the root file system. For that,
176 * let's see if the parent directory has the same inode as we
177 * are interested in. Hence, let's also do fstat() checks now,
178 * too, but avoid the st_dev comparisons, since they aren't
179 * that useful on unionfs mounts. */
180 check_st_dev = false;
181
182 fallback_fstat:
183 /* yay for fstatat() taking a different set of flags than the other
184 * _at() above */
185 if (flags & AT_SYMLINK_FOLLOW)
186 flags &= ~AT_SYMLINK_FOLLOW;
187 else
188 flags |= AT_SYMLINK_NOFOLLOW;
189 if (fstatat(fd, filename, &a, flags) < 0)
190 return -errno;
191
192 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
193 return -errno;
194
195 /* A directory with same device and inode as its parent? Must
196 * be the root directory */
197 if (a.st_dev == b.st_dev &&
198 a.st_ino == b.st_ino)
199 return 1;
200
201 return check_st_dev && (a.st_dev != b.st_dev);
202 }
203
204 /* flags can be AT_SYMLINK_FOLLOW or 0 */
205 int path_is_mount_point(const char *t, int flags) {
206 _cleanup_close_ int fd = -1;
207 _cleanup_free_ char *canonical = NULL, *parent = NULL;
208
209 assert(t);
210
211 if (path_equal(t, "/"))
212 return 1;
213
214 /* we need to resolve symlinks manually, we can't just rely on
215 * fd_is_mount_point() to do that for us; if we have a structure like
216 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
217 * look at needs to be /usr, not /. */
218 if (flags & AT_SYMLINK_FOLLOW) {
219 canonical = canonicalize_file_name(t);
220 if (!canonical)
221 return -errno;
222
223 t = canonical;
224 }
225
226 parent = dirname_malloc(t);
227 if (!parent)
228 return -ENOMEM;
229
230 fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
231 if (fd < 0)
232 return -errno;
233
234 return fd_is_mount_point(fd, basename(t), flags);
235 }
236
237 int umount_recursive(const char *prefix, int flags) {
238 bool again;
239 int n = 0, r;
240
241 /* Try to umount everything recursively below a
242 * directory. Also, take care of stacked mounts, and keep
243 * unmounting them until they are gone. */
244
245 do {
246 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
247
248 again = false;
249 r = 0;
250
251 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
252 if (!proc_self_mountinfo)
253 return -errno;
254
255 for (;;) {
256 _cleanup_free_ char *path = NULL, *p = NULL;
257 int k;
258
259 k = fscanf(proc_self_mountinfo,
260 "%*s " /* (1) mount id */
261 "%*s " /* (2) parent id */
262 "%*s " /* (3) major:minor */
263 "%*s " /* (4) root */
264 "%ms " /* (5) mount point */
265 "%*s" /* (6) mount options */
266 "%*[^-]" /* (7) optional fields */
267 "- " /* (8) separator */
268 "%*s " /* (9) file system type */
269 "%*s" /* (10) mount source */
270 "%*s" /* (11) mount options 2 */
271 "%*[^\n]", /* some rubbish at the end */
272 &path);
273 if (k != 1) {
274 if (k == EOF)
275 break;
276
277 continue;
278 }
279
280 r = cunescape(path, UNESCAPE_RELAX, &p);
281 if (r < 0)
282 return r;
283
284 if (!path_startswith(p, prefix))
285 continue;
286
287 if (umount2(p, flags) < 0) {
288 r = -errno;
289 continue;
290 }
291
292 again = true;
293 n++;
294
295 break;
296 }
297
298 } while (again);
299
300 return r ? r : n;
301 }
302
303 static int get_mount_flags(const char *path, unsigned long *flags) {
304 struct statvfs buf;
305
306 if (statvfs(path, &buf) < 0)
307 return -errno;
308 *flags = buf.f_flag;
309 return 0;
310 }
311
312 int bind_remount_recursive(const char *prefix, bool ro) {
313 _cleanup_set_free_free_ Set *done = NULL;
314 _cleanup_free_ char *cleaned = NULL;
315 int r;
316
317 /* Recursively remount a directory (and all its submounts)
318 * read-only or read-write. If the directory is already
319 * mounted, we reuse the mount and simply mark it
320 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
321 * operation). If it isn't we first make it one. Afterwards we
322 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
323 * submounts we can access, too. When mounts are stacked on
324 * the same mount point we only care for each individual
325 * "top-level" mount on each point, as we cannot
326 * influence/access the underlying mounts anyway. We do not
327 * have any effect on future submounts that might get
328 * propagated, they migt be writable. This includes future
329 * submounts that have been triggered via autofs. */
330
331 cleaned = strdup(prefix);
332 if (!cleaned)
333 return -ENOMEM;
334
335 path_kill_slashes(cleaned);
336
337 done = set_new(&string_hash_ops);
338 if (!done)
339 return -ENOMEM;
340
341 for (;;) {
342 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
343 _cleanup_set_free_free_ Set *todo = NULL;
344 bool top_autofs = false;
345 char *x;
346 unsigned long orig_flags;
347
348 todo = set_new(&string_hash_ops);
349 if (!todo)
350 return -ENOMEM;
351
352 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
353 if (!proc_self_mountinfo)
354 return -errno;
355
356 for (;;) {
357 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
358 int k;
359
360 k = fscanf(proc_self_mountinfo,
361 "%*s " /* (1) mount id */
362 "%*s " /* (2) parent id */
363 "%*s " /* (3) major:minor */
364 "%*s " /* (4) root */
365 "%ms " /* (5) mount point */
366 "%*s" /* (6) mount options (superblock) */
367 "%*[^-]" /* (7) optional fields */
368 "- " /* (8) separator */
369 "%ms " /* (9) file system type */
370 "%*s" /* (10) mount source */
371 "%*s" /* (11) mount options (bind mount) */
372 "%*[^\n]", /* some rubbish at the end */
373 &path,
374 &type);
375 if (k != 2) {
376 if (k == EOF)
377 break;
378
379 continue;
380 }
381
382 r = cunescape(path, UNESCAPE_RELAX, &p);
383 if (r < 0)
384 return r;
385
386 /* Let's ignore autofs mounts. If they aren't
387 * triggered yet, we want to avoid triggering
388 * them, as we don't make any guarantees for
389 * future submounts anyway. If they are
390 * already triggered, then we will find
391 * another entry for this. */
392 if (streq(type, "autofs")) {
393 top_autofs = top_autofs || path_equal(cleaned, p);
394 continue;
395 }
396
397 if (path_startswith(p, cleaned) &&
398 !set_contains(done, p)) {
399
400 r = set_consume(todo, p);
401 p = NULL;
402
403 if (r == -EEXIST)
404 continue;
405 if (r < 0)
406 return r;
407 }
408 }
409
410 /* If we have no submounts to process anymore and if
411 * the root is either already done, or an autofs, we
412 * are done */
413 if (set_isempty(todo) &&
414 (top_autofs || set_contains(done, cleaned)))
415 return 0;
416
417 if (!set_contains(done, cleaned) &&
418 !set_contains(todo, cleaned)) {
419 /* The prefix directory itself is not yet a
420 * mount, make it one. */
421 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
422 return -errno;
423
424 orig_flags = 0;
425 (void) get_mount_flags(cleaned, &orig_flags);
426 orig_flags &= ~MS_RDONLY;
427
428 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
429 return -errno;
430
431 x = strdup(cleaned);
432 if (!x)
433 return -ENOMEM;
434
435 r = set_consume(done, x);
436 if (r < 0)
437 return r;
438 }
439
440 while ((x = set_steal_first(todo))) {
441
442 r = set_consume(done, x);
443 if (r == -EEXIST || r == 0)
444 continue;
445 if (r < 0)
446 return r;
447
448 /* Try to reuse the original flag set, but
449 * don't care for errors, in case of
450 * obstructed mounts */
451 orig_flags = 0;
452 (void) get_mount_flags(x, &orig_flags);
453 orig_flags &= ~MS_RDONLY;
454
455 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
456
457 /* Deal with mount points that are
458 * obstructed by a later mount */
459
460 if (errno != ENOENT)
461 return -errno;
462 }
463
464 }
465 }
466 }
467
468 int mount_move_root(const char *path) {
469 assert(path);
470
471 if (chdir(path) < 0)
472 return -errno;
473
474 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
475 return -errno;
476
477 if (chroot(".") < 0)
478 return -errno;
479
480 if (chdir("/") < 0)
481 return -errno;
482
483 return 0;
484 }