]>
Commit | Line | Data |
---|---|---|
4349cd7c LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU Lesser General Public License as published by | |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | Lesser General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU Lesser General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <string.h> | |
23 | #include <sys/mount.h> | |
24 | #include <sys/statvfs.h> | |
25 | ||
b5efdb8a | 26 | #include "alloc-util.h" |
4349cd7c LP |
27 | #include "escape.h" |
28 | #include "fd-util.h" | |
29 | #include "fileio.h" | |
30 | #include "mount-util.h" | |
31 | #include "parse-util.h" | |
32 | #include "path-util.h" | |
33 | #include "set.h" | |
15a5e950 | 34 | #include "stdio-util.h" |
4349cd7c LP |
35 | #include "string-util.h" |
36 | #include "util.h" | |
37 | ||
38 | static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { | |
39 | char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; | |
40 | _cleanup_free_ char *fdinfo = NULL; | |
41 | _cleanup_close_ int subfd = -1; | |
42 | char *p; | |
43 | int r; | |
44 | ||
45 | if ((flags & AT_EMPTY_PATH) && isempty(filename)) | |
46 | xsprintf(path, "/proc/self/fdinfo/%i", fd); | |
47 | else { | |
48 | subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH); | |
49 | if (subfd < 0) | |
50 | return -errno; | |
51 | ||
52 | xsprintf(path, "/proc/self/fdinfo/%i", subfd); | |
53 | } | |
54 | ||
55 | r = read_full_file(path, &fdinfo, NULL); | |
56 | if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */ | |
57 | return -EOPNOTSUPP; | |
58 | if (r < 0) | |
59 | return -errno; | |
60 | ||
61 | p = startswith(fdinfo, "mnt_id:"); | |
62 | if (!p) { | |
63 | p = strstr(fdinfo, "\nmnt_id:"); | |
64 | if (!p) /* The mnt_id field is a relatively new addition */ | |
65 | return -EOPNOTSUPP; | |
66 | ||
67 | p += 8; | |
68 | } | |
69 | ||
70 | p += strspn(p, WHITESPACE); | |
71 | p[strcspn(p, WHITESPACE)] = 0; | |
72 | ||
73 | return safe_atoi(p, mnt_id); | |
74 | } | |
75 | ||
76 | ||
77 | int fd_is_mount_point(int fd, const char *filename, int flags) { | |
78 | union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT; | |
79 | int mount_id = -1, mount_id_parent = -1; | |
80 | bool nosupp = false, check_st_dev = true; | |
81 | struct stat a, b; | |
82 | int r; | |
83 | ||
84 | assert(fd >= 0); | |
85 | assert(filename); | |
86 | ||
87 | /* First we will try the name_to_handle_at() syscall, which | |
88 | * tells us the mount id and an opaque file "handle". It is | |
89 | * not supported everywhere though (kernel compile-time | |
90 | * option, not all file systems are hooked up). If it works | |
91 | * the mount id is usually good enough to tell us whether | |
92 | * something is a mount point. | |
93 | * | |
94 | * If that didn't work we will try to read the mount id from | |
95 | * /proc/self/fdinfo/<fd>. This is almost as good as | |
96 | * name_to_handle_at(), however, does not return the | |
97 | * opaque file handle. The opaque file handle is pretty useful | |
98 | * to detect the root directory, which we should always | |
99 | * consider a mount point. Hence we use this only as | |
100 | * fallback. Exporting the mnt_id in fdinfo is a pretty recent | |
101 | * kernel addition. | |
102 | * | |
103 | * As last fallback we do traditional fstat() based st_dev | |
104 | * comparisons. This is how things were traditionally done, | |
105 | * but unionfs breaks breaks this since it exposes file | |
106 | * systems with a variety of st_dev reported. Also, btrfs | |
107 | * subvolumes have different st_dev, even though they aren't | |
108 | * real mounts of their own. */ | |
109 | ||
110 | r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags); | |
111 | if (r < 0) { | |
112 | if (errno == ENOSYS) | |
113 | /* This kernel does not support name_to_handle_at() | |
114 | * fall back to simpler logic. */ | |
115 | goto fallback_fdinfo; | |
116 | else if (errno == EOPNOTSUPP) | |
117 | /* This kernel or file system does not support | |
118 | * name_to_handle_at(), hence let's see if the | |
119 | * upper fs supports it (in which case it is a | |
120 | * mount point), otherwise fallback to the | |
121 | * traditional stat() logic */ | |
122 | nosupp = true; | |
123 | else | |
124 | return -errno; | |
125 | } | |
126 | ||
127 | r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH); | |
128 | if (r < 0) { | |
129 | if (errno == EOPNOTSUPP) { | |
130 | if (nosupp) | |
131 | /* Neither parent nor child do name_to_handle_at()? | |
132 | We have no choice but to fall back. */ | |
133 | goto fallback_fdinfo; | |
134 | else | |
135 | /* The parent can't do name_to_handle_at() but the | |
136 | * directory we are interested in can? | |
137 | * If so, it must be a mount point. */ | |
138 | return 1; | |
139 | } else | |
140 | return -errno; | |
141 | } | |
142 | ||
143 | /* The parent can do name_to_handle_at() but the | |
144 | * directory we are interested in can't? If so, it | |
145 | * must be a mount point. */ | |
146 | if (nosupp) | |
147 | return 1; | |
148 | ||
149 | /* If the file handle for the directory we are | |
150 | * interested in and its parent are identical, we | |
151 | * assume this is the root directory, which is a mount | |
152 | * point. */ | |
153 | ||
154 | if (h.handle.handle_bytes == h_parent.handle.handle_bytes && | |
155 | h.handle.handle_type == h_parent.handle.handle_type && | |
156 | memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0) | |
157 | return 1; | |
158 | ||
159 | return mount_id != mount_id_parent; | |
160 | ||
161 | fallback_fdinfo: | |
162 | r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); | |
163 | if (r == -EOPNOTSUPP) | |
164 | goto fallback_fstat; | |
165 | if (r < 0) | |
166 | return r; | |
167 | ||
168 | r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent); | |
169 | if (r < 0) | |
170 | return r; | |
171 | ||
172 | if (mount_id != mount_id_parent) | |
173 | return 1; | |
174 | ||
175 | /* Hmm, so, the mount ids are the same. This leaves one | |
176 | * special case though for the root file system. For that, | |
177 | * let's see if the parent directory has the same inode as we | |
178 | * are interested in. Hence, let's also do fstat() checks now, | |
179 | * too, but avoid the st_dev comparisons, since they aren't | |
180 | * that useful on unionfs mounts. */ | |
181 | check_st_dev = false; | |
182 | ||
183 | fallback_fstat: | |
184 | /* yay for fstatat() taking a different set of flags than the other | |
185 | * _at() above */ | |
186 | if (flags & AT_SYMLINK_FOLLOW) | |
187 | flags &= ~AT_SYMLINK_FOLLOW; | |
188 | else | |
189 | flags |= AT_SYMLINK_NOFOLLOW; | |
190 | if (fstatat(fd, filename, &a, flags) < 0) | |
191 | return -errno; | |
192 | ||
193 | if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0) | |
194 | return -errno; | |
195 | ||
196 | /* A directory with same device and inode as its parent? Must | |
197 | * be the root directory */ | |
198 | if (a.st_dev == b.st_dev && | |
199 | a.st_ino == b.st_ino) | |
200 | return 1; | |
201 | ||
202 | return check_st_dev && (a.st_dev != b.st_dev); | |
203 | } | |
204 | ||
205 | /* flags can be AT_SYMLINK_FOLLOW or 0 */ | |
206 | int path_is_mount_point(const char *t, int flags) { | |
207 | _cleanup_close_ int fd = -1; | |
208 | _cleanup_free_ char *canonical = NULL, *parent = NULL; | |
209 | ||
210 | assert(t); | |
211 | ||
212 | if (path_equal(t, "/")) | |
213 | return 1; | |
214 | ||
215 | /* we need to resolve symlinks manually, we can't just rely on | |
216 | * fd_is_mount_point() to do that for us; if we have a structure like | |
217 | * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we | |
218 | * look at needs to be /usr, not /. */ | |
219 | if (flags & AT_SYMLINK_FOLLOW) { | |
220 | canonical = canonicalize_file_name(t); | |
221 | if (!canonical) | |
222 | return -errno; | |
223 | ||
224 | t = canonical; | |
225 | } | |
226 | ||
227 | parent = dirname_malloc(t); | |
228 | if (!parent) | |
229 | return -ENOMEM; | |
230 | ||
231 | fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH); | |
232 | if (fd < 0) | |
233 | return -errno; | |
234 | ||
235 | return fd_is_mount_point(fd, basename(t), flags); | |
236 | } | |
237 | ||
238 | int umount_recursive(const char *prefix, int flags) { | |
239 | bool again; | |
240 | int n = 0, r; | |
241 | ||
242 | /* Try to umount everything recursively below a | |
243 | * directory. Also, take care of stacked mounts, and keep | |
244 | * unmounting them until they are gone. */ | |
245 | ||
246 | do { | |
247 | _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; | |
248 | ||
249 | again = false; | |
250 | r = 0; | |
251 | ||
252 | proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); | |
253 | if (!proc_self_mountinfo) | |
254 | return -errno; | |
255 | ||
256 | for (;;) { | |
257 | _cleanup_free_ char *path = NULL, *p = NULL; | |
258 | int k; | |
259 | ||
260 | k = fscanf(proc_self_mountinfo, | |
261 | "%*s " /* (1) mount id */ | |
262 | "%*s " /* (2) parent id */ | |
263 | "%*s " /* (3) major:minor */ | |
264 | "%*s " /* (4) root */ | |
265 | "%ms " /* (5) mount point */ | |
266 | "%*s" /* (6) mount options */ | |
267 | "%*[^-]" /* (7) optional fields */ | |
268 | "- " /* (8) separator */ | |
269 | "%*s " /* (9) file system type */ | |
270 | "%*s" /* (10) mount source */ | |
271 | "%*s" /* (11) mount options 2 */ | |
272 | "%*[^\n]", /* some rubbish at the end */ | |
273 | &path); | |
274 | if (k != 1) { | |
275 | if (k == EOF) | |
276 | break; | |
277 | ||
278 | continue; | |
279 | } | |
280 | ||
281 | r = cunescape(path, UNESCAPE_RELAX, &p); | |
282 | if (r < 0) | |
283 | return r; | |
284 | ||
285 | if (!path_startswith(p, prefix)) | |
286 | continue; | |
287 | ||
288 | if (umount2(p, flags) < 0) { | |
289 | r = -errno; | |
290 | continue; | |
291 | } | |
292 | ||
293 | again = true; | |
294 | n++; | |
295 | ||
296 | break; | |
297 | } | |
298 | ||
299 | } while (again); | |
300 | ||
301 | return r ? r : n; | |
302 | } | |
303 | ||
304 | static int get_mount_flags(const char *path, unsigned long *flags) { | |
305 | struct statvfs buf; | |
306 | ||
307 | if (statvfs(path, &buf) < 0) | |
308 | return -errno; | |
309 | *flags = buf.f_flag; | |
310 | return 0; | |
311 | } | |
312 | ||
313 | int bind_remount_recursive(const char *prefix, bool ro) { | |
314 | _cleanup_set_free_free_ Set *done = NULL; | |
315 | _cleanup_free_ char *cleaned = NULL; | |
316 | int r; | |
317 | ||
318 | /* Recursively remount a directory (and all its submounts) | |
319 | * read-only or read-write. If the directory is already | |
320 | * mounted, we reuse the mount and simply mark it | |
321 | * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write | |
322 | * operation). If it isn't we first make it one. Afterwards we | |
323 | * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all | |
324 | * submounts we can access, too. When mounts are stacked on | |
325 | * the same mount point we only care for each individual | |
326 | * "top-level" mount on each point, as we cannot | |
327 | * influence/access the underlying mounts anyway. We do not | |
328 | * have any effect on future submounts that might get | |
329 | * propagated, they migt be writable. This includes future | |
330 | * submounts that have been triggered via autofs. */ | |
331 | ||
332 | cleaned = strdup(prefix); | |
333 | if (!cleaned) | |
334 | return -ENOMEM; | |
335 | ||
336 | path_kill_slashes(cleaned); | |
337 | ||
338 | done = set_new(&string_hash_ops); | |
339 | if (!done) | |
340 | return -ENOMEM; | |
341 | ||
342 | for (;;) { | |
343 | _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; | |
344 | _cleanup_set_free_free_ Set *todo = NULL; | |
345 | bool top_autofs = false; | |
346 | char *x; | |
347 | unsigned long orig_flags; | |
348 | ||
349 | todo = set_new(&string_hash_ops); | |
350 | if (!todo) | |
351 | return -ENOMEM; | |
352 | ||
353 | proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); | |
354 | if (!proc_self_mountinfo) | |
355 | return -errno; | |
356 | ||
357 | for (;;) { | |
358 | _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL; | |
359 | int k; | |
360 | ||
361 | k = fscanf(proc_self_mountinfo, | |
362 | "%*s " /* (1) mount id */ | |
363 | "%*s " /* (2) parent id */ | |
364 | "%*s " /* (3) major:minor */ | |
365 | "%*s " /* (4) root */ | |
366 | "%ms " /* (5) mount point */ | |
367 | "%*s" /* (6) mount options (superblock) */ | |
368 | "%*[^-]" /* (7) optional fields */ | |
369 | "- " /* (8) separator */ | |
370 | "%ms " /* (9) file system type */ | |
371 | "%*s" /* (10) mount source */ | |
372 | "%*s" /* (11) mount options (bind mount) */ | |
373 | "%*[^\n]", /* some rubbish at the end */ | |
374 | &path, | |
375 | &type); | |
376 | if (k != 2) { | |
377 | if (k == EOF) | |
378 | break; | |
379 | ||
380 | continue; | |
381 | } | |
382 | ||
383 | r = cunescape(path, UNESCAPE_RELAX, &p); | |
384 | if (r < 0) | |
385 | return r; | |
386 | ||
387 | /* Let's ignore autofs mounts. If they aren't | |
388 | * triggered yet, we want to avoid triggering | |
389 | * them, as we don't make any guarantees for | |
390 | * future submounts anyway. If they are | |
391 | * already triggered, then we will find | |
392 | * another entry for this. */ | |
393 | if (streq(type, "autofs")) { | |
394 | top_autofs = top_autofs || path_equal(cleaned, p); | |
395 | continue; | |
396 | } | |
397 | ||
398 | if (path_startswith(p, cleaned) && | |
399 | !set_contains(done, p)) { | |
400 | ||
401 | r = set_consume(todo, p); | |
402 | p = NULL; | |
403 | ||
404 | if (r == -EEXIST) | |
405 | continue; | |
406 | if (r < 0) | |
407 | return r; | |
408 | } | |
409 | } | |
410 | ||
411 | /* If we have no submounts to process anymore and if | |
412 | * the root is either already done, or an autofs, we | |
413 | * are done */ | |
414 | if (set_isempty(todo) && | |
415 | (top_autofs || set_contains(done, cleaned))) | |
416 | return 0; | |
417 | ||
418 | if (!set_contains(done, cleaned) && | |
419 | !set_contains(todo, cleaned)) { | |
420 | /* The prefix directory itself is not yet a | |
421 | * mount, make it one. */ | |
422 | if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) | |
423 | return -errno; | |
424 | ||
425 | orig_flags = 0; | |
426 | (void) get_mount_flags(cleaned, &orig_flags); | |
427 | orig_flags &= ~MS_RDONLY; | |
428 | ||
429 | if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) | |
430 | return -errno; | |
431 | ||
432 | x = strdup(cleaned); | |
433 | if (!x) | |
434 | return -ENOMEM; | |
435 | ||
436 | r = set_consume(done, x); | |
437 | if (r < 0) | |
438 | return r; | |
439 | } | |
440 | ||
441 | while ((x = set_steal_first(todo))) { | |
442 | ||
443 | r = set_consume(done, x); | |
444 | if (r == -EEXIST || r == 0) | |
445 | continue; | |
446 | if (r < 0) | |
447 | return r; | |
448 | ||
449 | /* Try to reuse the original flag set, but | |
450 | * don't care for errors, in case of | |
451 | * obstructed mounts */ | |
452 | orig_flags = 0; | |
453 | (void) get_mount_flags(x, &orig_flags); | |
454 | orig_flags &= ~MS_RDONLY; | |
455 | ||
456 | if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) { | |
457 | ||
458 | /* Deal with mount points that are | |
459 | * obstructed by a later mount */ | |
460 | ||
461 | if (errno != ENOENT) | |
462 | return -errno; | |
463 | } | |
464 | ||
465 | } | |
466 | } | |
467 | } | |
468 | ||
469 | int mount_move_root(const char *path) { | |
470 | assert(path); | |
471 | ||
472 | if (chdir(path) < 0) | |
473 | return -errno; | |
474 | ||
475 | if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) | |
476 | return -errno; | |
477 | ||
478 | if (chroot(".") < 0) | |
479 | return -errno; | |
480 | ||
481 | if (chdir("/") < 0) | |
482 | return -errno; | |
483 | ||
484 | return 0; | |
485 | } |