]>
Commit | Line | Data |
---|---|---|
4349cd7c LP |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2010 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
11c3a366 TA |
20 | #include <errno.h> |
21 | #include <stdlib.h> | |
4349cd7c LP |
22 | #include <string.h> |
23 | #include <sys/mount.h> | |
11c3a366 | 24 | #include <sys/stat.h> |
4349cd7c | 25 | #include <sys/statvfs.h> |
11c3a366 | 26 | #include <unistd.h> |
4349cd7c | 27 | |
b5efdb8a | 28 | #include "alloc-util.h" |
4349cd7c LP |
29 | #include "escape.h" |
30 | #include "fd-util.h" | |
31 | #include "fileio.h" | |
93cc7779 | 32 | #include "hashmap.h" |
4349cd7c LP |
33 | #include "mount-util.h" |
34 | #include "parse-util.h" | |
35 | #include "path-util.h" | |
36 | #include "set.h" | |
15a5e950 | 37 | #include "stdio-util.h" |
4349cd7c | 38 | #include "string-util.h" |
4349cd7c LP |
39 | |
40 | static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { | |
41 | char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; | |
42 | _cleanup_free_ char *fdinfo = NULL; | |
43 | _cleanup_close_ int subfd = -1; | |
44 | char *p; | |
45 | int r; | |
46 | ||
47 | if ((flags & AT_EMPTY_PATH) && isempty(filename)) | |
48 | xsprintf(path, "/proc/self/fdinfo/%i", fd); | |
49 | else { | |
c4b69156 | 50 | subfd = openat(fd, filename, O_CLOEXEC|O_PATH); |
4349cd7c LP |
51 | if (subfd < 0) |
52 | return -errno; | |
53 | ||
54 | xsprintf(path, "/proc/self/fdinfo/%i", subfd); | |
55 | } | |
56 | ||
57 | r = read_full_file(path, &fdinfo, NULL); | |
58 | if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */ | |
59 | return -EOPNOTSUPP; | |
60 | if (r < 0) | |
61 | return -errno; | |
62 | ||
63 | p = startswith(fdinfo, "mnt_id:"); | |
64 | if (!p) { | |
65 | p = strstr(fdinfo, "\nmnt_id:"); | |
66 | if (!p) /* The mnt_id field is a relatively new addition */ | |
67 | return -EOPNOTSUPP; | |
68 | ||
69 | p += 8; | |
70 | } | |
71 | ||
72 | p += strspn(p, WHITESPACE); | |
73 | p[strcspn(p, WHITESPACE)] = 0; | |
74 | ||
75 | return safe_atoi(p, mnt_id); | |
76 | } | |
77 | ||
78 | ||
79 | int fd_is_mount_point(int fd, const char *filename, int flags) { | |
80 | union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT; | |
81 | int mount_id = -1, mount_id_parent = -1; | |
82 | bool nosupp = false, check_st_dev = true; | |
83 | struct stat a, b; | |
84 | int r; | |
85 | ||
86 | assert(fd >= 0); | |
87 | assert(filename); | |
88 | ||
89 | /* First we will try the name_to_handle_at() syscall, which | |
90 | * tells us the mount id and an opaque file "handle". It is | |
91 | * not supported everywhere though (kernel compile-time | |
92 | * option, not all file systems are hooked up). If it works | |
93 | * the mount id is usually good enough to tell us whether | |
94 | * something is a mount point. | |
95 | * | |
96 | * If that didn't work we will try to read the mount id from | |
97 | * /proc/self/fdinfo/<fd>. This is almost as good as | |
98 | * name_to_handle_at(), however, does not return the | |
99 | * opaque file handle. The opaque file handle is pretty useful | |
100 | * to detect the root directory, which we should always | |
101 | * consider a mount point. Hence we use this only as | |
102 | * fallback. Exporting the mnt_id in fdinfo is a pretty recent | |
103 | * kernel addition. | |
104 | * | |
105 | * As last fallback we do traditional fstat() based st_dev | |
106 | * comparisons. This is how things were traditionally done, | |
61233823 | 107 | * but unionfs breaks this since it exposes file |
4349cd7c LP |
108 | * systems with a variety of st_dev reported. Also, btrfs |
109 | * subvolumes have different st_dev, even though they aren't | |
110 | * real mounts of their own. */ | |
111 | ||
112 | r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags); | |
113 | if (r < 0) { | |
114 | if (errno == ENOSYS) | |
115 | /* This kernel does not support name_to_handle_at() | |
116 | * fall back to simpler logic. */ | |
117 | goto fallback_fdinfo; | |
118 | else if (errno == EOPNOTSUPP) | |
119 | /* This kernel or file system does not support | |
120 | * name_to_handle_at(), hence let's see if the | |
121 | * upper fs supports it (in which case it is a | |
122 | * mount point), otherwise fallback to the | |
123 | * traditional stat() logic */ | |
124 | nosupp = true; | |
125 | else | |
126 | return -errno; | |
127 | } | |
128 | ||
129 | r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH); | |
130 | if (r < 0) { | |
131 | if (errno == EOPNOTSUPP) { | |
132 | if (nosupp) | |
133 | /* Neither parent nor child do name_to_handle_at()? | |
134 | We have no choice but to fall back. */ | |
135 | goto fallback_fdinfo; | |
136 | else | |
137 | /* The parent can't do name_to_handle_at() but the | |
138 | * directory we are interested in can? | |
139 | * If so, it must be a mount point. */ | |
140 | return 1; | |
141 | } else | |
142 | return -errno; | |
143 | } | |
144 | ||
145 | /* The parent can do name_to_handle_at() but the | |
146 | * directory we are interested in can't? If so, it | |
147 | * must be a mount point. */ | |
148 | if (nosupp) | |
149 | return 1; | |
150 | ||
151 | /* If the file handle for the directory we are | |
152 | * interested in and its parent are identical, we | |
153 | * assume this is the root directory, which is a mount | |
154 | * point. */ | |
155 | ||
156 | if (h.handle.handle_bytes == h_parent.handle.handle_bytes && | |
157 | h.handle.handle_type == h_parent.handle.handle_type && | |
158 | memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0) | |
159 | return 1; | |
160 | ||
161 | return mount_id != mount_id_parent; | |
162 | ||
163 | fallback_fdinfo: | |
164 | r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); | |
165 | if (r == -EOPNOTSUPP) | |
166 | goto fallback_fstat; | |
167 | if (r < 0) | |
168 | return r; | |
169 | ||
170 | r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent); | |
171 | if (r < 0) | |
172 | return r; | |
173 | ||
174 | if (mount_id != mount_id_parent) | |
175 | return 1; | |
176 | ||
177 | /* Hmm, so, the mount ids are the same. This leaves one | |
178 | * special case though for the root file system. For that, | |
179 | * let's see if the parent directory has the same inode as we | |
180 | * are interested in. Hence, let's also do fstat() checks now, | |
181 | * too, but avoid the st_dev comparisons, since they aren't | |
182 | * that useful on unionfs mounts. */ | |
183 | check_st_dev = false; | |
184 | ||
185 | fallback_fstat: | |
186 | /* yay for fstatat() taking a different set of flags than the other | |
187 | * _at() above */ | |
188 | if (flags & AT_SYMLINK_FOLLOW) | |
189 | flags &= ~AT_SYMLINK_FOLLOW; | |
190 | else | |
191 | flags |= AT_SYMLINK_NOFOLLOW; | |
192 | if (fstatat(fd, filename, &a, flags) < 0) | |
193 | return -errno; | |
194 | ||
195 | if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0) | |
196 | return -errno; | |
197 | ||
198 | /* A directory with same device and inode as its parent? Must | |
199 | * be the root directory */ | |
200 | if (a.st_dev == b.st_dev && | |
201 | a.st_ino == b.st_ino) | |
202 | return 1; | |
203 | ||
204 | return check_st_dev && (a.st_dev != b.st_dev); | |
205 | } | |
206 | ||
207 | /* flags can be AT_SYMLINK_FOLLOW or 0 */ | |
208 | int path_is_mount_point(const char *t, int flags) { | |
209 | _cleanup_close_ int fd = -1; | |
210 | _cleanup_free_ char *canonical = NULL, *parent = NULL; | |
211 | ||
212 | assert(t); | |
213 | ||
214 | if (path_equal(t, "/")) | |
215 | return 1; | |
216 | ||
217 | /* we need to resolve symlinks manually, we can't just rely on | |
218 | * fd_is_mount_point() to do that for us; if we have a structure like | |
219 | * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we | |
220 | * look at needs to be /usr, not /. */ | |
221 | if (flags & AT_SYMLINK_FOLLOW) { | |
222 | canonical = canonicalize_file_name(t); | |
223 | if (!canonical) | |
224 | return -errno; | |
225 | ||
226 | t = canonical; | |
227 | } | |
228 | ||
229 | parent = dirname_malloc(t); | |
230 | if (!parent) | |
231 | return -ENOMEM; | |
232 | ||
c4b69156 | 233 | fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH); |
4349cd7c LP |
234 | if (fd < 0) |
235 | return -errno; | |
236 | ||
237 | return fd_is_mount_point(fd, basename(t), flags); | |
238 | } | |
239 | ||
240 | int umount_recursive(const char *prefix, int flags) { | |
241 | bool again; | |
242 | int n = 0, r; | |
243 | ||
244 | /* Try to umount everything recursively below a | |
245 | * directory. Also, take care of stacked mounts, and keep | |
246 | * unmounting them until they are gone. */ | |
247 | ||
248 | do { | |
249 | _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; | |
250 | ||
251 | again = false; | |
252 | r = 0; | |
253 | ||
254 | proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); | |
255 | if (!proc_self_mountinfo) | |
256 | return -errno; | |
257 | ||
258 | for (;;) { | |
259 | _cleanup_free_ char *path = NULL, *p = NULL; | |
260 | int k; | |
261 | ||
262 | k = fscanf(proc_self_mountinfo, | |
263 | "%*s " /* (1) mount id */ | |
264 | "%*s " /* (2) parent id */ | |
265 | "%*s " /* (3) major:minor */ | |
266 | "%*s " /* (4) root */ | |
267 | "%ms " /* (5) mount point */ | |
268 | "%*s" /* (6) mount options */ | |
269 | "%*[^-]" /* (7) optional fields */ | |
270 | "- " /* (8) separator */ | |
271 | "%*s " /* (9) file system type */ | |
272 | "%*s" /* (10) mount source */ | |
273 | "%*s" /* (11) mount options 2 */ | |
274 | "%*[^\n]", /* some rubbish at the end */ | |
275 | &path); | |
276 | if (k != 1) { | |
277 | if (k == EOF) | |
278 | break; | |
279 | ||
280 | continue; | |
281 | } | |
282 | ||
283 | r = cunescape(path, UNESCAPE_RELAX, &p); | |
284 | if (r < 0) | |
285 | return r; | |
286 | ||
287 | if (!path_startswith(p, prefix)) | |
288 | continue; | |
289 | ||
290 | if (umount2(p, flags) < 0) { | |
291 | r = -errno; | |
292 | continue; | |
293 | } | |
294 | ||
295 | again = true; | |
296 | n++; | |
297 | ||
298 | break; | |
299 | } | |
300 | ||
301 | } while (again); | |
302 | ||
303 | return r ? r : n; | |
304 | } | |
305 | ||
306 | static int get_mount_flags(const char *path, unsigned long *flags) { | |
307 | struct statvfs buf; | |
308 | ||
309 | if (statvfs(path, &buf) < 0) | |
310 | return -errno; | |
311 | *flags = buf.f_flag; | |
312 | return 0; | |
313 | } | |
314 | ||
315 | int bind_remount_recursive(const char *prefix, bool ro) { | |
316 | _cleanup_set_free_free_ Set *done = NULL; | |
317 | _cleanup_free_ char *cleaned = NULL; | |
318 | int r; | |
319 | ||
320 | /* Recursively remount a directory (and all its submounts) | |
321 | * read-only or read-write. If the directory is already | |
322 | * mounted, we reuse the mount and simply mark it | |
323 | * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write | |
324 | * operation). If it isn't we first make it one. Afterwards we | |
325 | * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all | |
326 | * submounts we can access, too. When mounts are stacked on | |
327 | * the same mount point we only care for each individual | |
328 | * "top-level" mount on each point, as we cannot | |
329 | * influence/access the underlying mounts anyway. We do not | |
330 | * have any effect on future submounts that might get | |
331 | * propagated, they migt be writable. This includes future | |
332 | * submounts that have been triggered via autofs. */ | |
333 | ||
334 | cleaned = strdup(prefix); | |
335 | if (!cleaned) | |
336 | return -ENOMEM; | |
337 | ||
338 | path_kill_slashes(cleaned); | |
339 | ||
340 | done = set_new(&string_hash_ops); | |
341 | if (!done) | |
342 | return -ENOMEM; | |
343 | ||
344 | for (;;) { | |
345 | _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; | |
346 | _cleanup_set_free_free_ Set *todo = NULL; | |
347 | bool top_autofs = false; | |
348 | char *x; | |
349 | unsigned long orig_flags; | |
350 | ||
351 | todo = set_new(&string_hash_ops); | |
352 | if (!todo) | |
353 | return -ENOMEM; | |
354 | ||
355 | proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); | |
356 | if (!proc_self_mountinfo) | |
357 | return -errno; | |
358 | ||
359 | for (;;) { | |
360 | _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL; | |
361 | int k; | |
362 | ||
363 | k = fscanf(proc_self_mountinfo, | |
364 | "%*s " /* (1) mount id */ | |
365 | "%*s " /* (2) parent id */ | |
366 | "%*s " /* (3) major:minor */ | |
367 | "%*s " /* (4) root */ | |
368 | "%ms " /* (5) mount point */ | |
369 | "%*s" /* (6) mount options (superblock) */ | |
370 | "%*[^-]" /* (7) optional fields */ | |
371 | "- " /* (8) separator */ | |
372 | "%ms " /* (9) file system type */ | |
373 | "%*s" /* (10) mount source */ | |
374 | "%*s" /* (11) mount options (bind mount) */ | |
375 | "%*[^\n]", /* some rubbish at the end */ | |
376 | &path, | |
377 | &type); | |
378 | if (k != 2) { | |
379 | if (k == EOF) | |
380 | break; | |
381 | ||
382 | continue; | |
383 | } | |
384 | ||
385 | r = cunescape(path, UNESCAPE_RELAX, &p); | |
386 | if (r < 0) | |
387 | return r; | |
388 | ||
389 | /* Let's ignore autofs mounts. If they aren't | |
390 | * triggered yet, we want to avoid triggering | |
391 | * them, as we don't make any guarantees for | |
392 | * future submounts anyway. If they are | |
393 | * already triggered, then we will find | |
394 | * another entry for this. */ | |
395 | if (streq(type, "autofs")) { | |
396 | top_autofs = top_autofs || path_equal(cleaned, p); | |
397 | continue; | |
398 | } | |
399 | ||
400 | if (path_startswith(p, cleaned) && | |
401 | !set_contains(done, p)) { | |
402 | ||
403 | r = set_consume(todo, p); | |
404 | p = NULL; | |
405 | ||
406 | if (r == -EEXIST) | |
407 | continue; | |
408 | if (r < 0) | |
409 | return r; | |
410 | } | |
411 | } | |
412 | ||
413 | /* If we have no submounts to process anymore and if | |
414 | * the root is either already done, or an autofs, we | |
415 | * are done */ | |
416 | if (set_isempty(todo) && | |
417 | (top_autofs || set_contains(done, cleaned))) | |
418 | return 0; | |
419 | ||
420 | if (!set_contains(done, cleaned) && | |
421 | !set_contains(todo, cleaned)) { | |
422 | /* The prefix directory itself is not yet a | |
423 | * mount, make it one. */ | |
424 | if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) | |
425 | return -errno; | |
426 | ||
427 | orig_flags = 0; | |
428 | (void) get_mount_flags(cleaned, &orig_flags); | |
429 | orig_flags &= ~MS_RDONLY; | |
430 | ||
431 | if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) | |
432 | return -errno; | |
433 | ||
434 | x = strdup(cleaned); | |
435 | if (!x) | |
436 | return -ENOMEM; | |
437 | ||
438 | r = set_consume(done, x); | |
439 | if (r < 0) | |
440 | return r; | |
441 | } | |
442 | ||
443 | while ((x = set_steal_first(todo))) { | |
444 | ||
445 | r = set_consume(done, x); | |
446 | if (r == -EEXIST || r == 0) | |
447 | continue; | |
448 | if (r < 0) | |
449 | return r; | |
450 | ||
451 | /* Try to reuse the original flag set, but | |
452 | * don't care for errors, in case of | |
453 | * obstructed mounts */ | |
454 | orig_flags = 0; | |
455 | (void) get_mount_flags(x, &orig_flags); | |
456 | orig_flags &= ~MS_RDONLY; | |
457 | ||
458 | if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) { | |
459 | ||
460 | /* Deal with mount points that are | |
461 | * obstructed by a later mount */ | |
462 | ||
463 | if (errno != ENOENT) | |
464 | return -errno; | |
465 | } | |
466 | ||
467 | } | |
468 | } | |
469 | } | |
470 | ||
471 | int mount_move_root(const char *path) { | |
472 | assert(path); | |
473 | ||
474 | if (chdir(path) < 0) | |
475 | return -errno; | |
476 | ||
477 | if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) | |
478 | return -errno; | |
479 | ||
480 | if (chroot(".") < 0) | |
481 | return -errno; | |
482 | ||
483 | if (chdir("/") < 0) | |
484 | return -errno; | |
485 | ||
486 | return 0; | |
487 | } | |
4e036b7a LP |
488 | |
489 | bool fstype_is_network(const char *fstype) { | |
490 | static const char table[] = | |
491 | "afs\0" | |
492 | "cifs\0" | |
493 | "smbfs\0" | |
494 | "sshfs\0" | |
495 | "ncpfs\0" | |
496 | "ncp\0" | |
497 | "nfs\0" | |
498 | "nfs4\0" | |
499 | "gfs\0" | |
500 | "gfs2\0" | |
a44cb5cb ZJS |
501 | "glusterfs\0" |
502 | "pvfs2\0" /* OrangeFS */ | |
0a86e681 | 503 | "ocfs2\0" |
a44cb5cb | 504 | ; |
4e036b7a LP |
505 | |
506 | const char *x; | |
507 | ||
508 | x = startswith(fstype, "fuse."); | |
509 | if (x) | |
510 | fstype = x; | |
511 | ||
512 | return nulstr_contains(table, fstype); | |
513 | } | |
3f2c0bec LP |
514 | |
515 | int repeat_unmount(const char *path, int flags) { | |
516 | bool done = false; | |
517 | ||
518 | assert(path); | |
519 | ||
520 | /* If there are multiple mounts on a mount point, this | |
521 | * removes them all */ | |
522 | ||
523 | for (;;) { | |
524 | if (umount2(path, flags) < 0) { | |
525 | ||
526 | if (errno == EINVAL) | |
527 | return done; | |
528 | ||
529 | return -errno; | |
530 | } | |
531 | ||
532 | done = true; | |
533 | } | |
534 | } | |
c4b41707 AP |
535 | |
536 | const char* mode_to_inaccessible_node(mode_t mode) { | |
537 | switch(mode & S_IFMT) { | |
538 | case S_IFREG: | |
539 | return "/run/systemd/inaccessible/reg"; | |
540 | case S_IFDIR: | |
541 | return "/run/systemd/inaccessible/dir"; | |
542 | case S_IFCHR: | |
543 | return "/run/systemd/inaccessible/chr"; | |
544 | case S_IFBLK: | |
545 | return "/run/systemd/inaccessible/blk"; | |
546 | case S_IFIFO: | |
547 | return "/run/systemd/inaccessible/fifo"; | |
548 | case S_IFSOCK: | |
549 | return "/run/systemd/inaccessible/sock"; | |
550 | } | |
551 | return NULL; | |
552 | } |