]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | /*** | |
3 | Copyright © 2010 ProFUSION embedded systems | |
4 | ***/ | |
5 | ||
6 | #include <fcntl.h> | |
7 | #include <sys/mount.h> | |
8 | #include <unistd.h> | |
9 | ||
10 | #include "alloc-util.h" | |
11 | #include "chase.h" | |
12 | #include "constants.h" | |
13 | #include "dirent-util.h" | |
14 | #include "errno-util.h" | |
15 | #include "fd-util.h" | |
16 | #include "fileio.h" | |
17 | #include "format-util.h" | |
18 | #include "fs-util.h" | |
19 | #include "fstab-util.h" | |
20 | #include "libmount-util.h" | |
21 | #include "log.h" | |
22 | #include "mkdir.h" | |
23 | #include "mount-setup.h" | |
24 | #include "mount-util.h" | |
25 | #include "mountpoint-util.h" | |
26 | #include "parse-util.h" | |
27 | #include "process-util.h" | |
28 | #include "random-util.h" | |
29 | #include "signal-util.h" | |
30 | #include "stat-util.h" | |
31 | #include "string-util.h" | |
32 | #include "umount.h" | |
33 | #include "virt.h" | |
34 | ||
35 | static void mount_point_free(MountPoint **head, MountPoint *m) { | |
36 | assert(head); | |
37 | assert(m); | |
38 | ||
39 | LIST_REMOVE(mount_point, *head, m); | |
40 | ||
41 | free(m->path); | |
42 | free(m->remount_options); | |
43 | free(m); | |
44 | } | |
45 | ||
46 | void mount_points_list_free(MountPoint **head) { | |
47 | assert(head); | |
48 | ||
49 | while (*head) | |
50 | mount_point_free(head, *head); | |
51 | } | |
52 | ||
53 | int mount_points_list_get(FILE *f, MountPoint **head) { | |
54 | _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; | |
55 | _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; | |
56 | int r; | |
57 | ||
58 | assert(head); | |
59 | ||
60 | r = libmount_parse_mountinfo(f, &table, &iter); | |
61 | if (r < 0) | |
62 | return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m"); | |
63 | ||
64 | for (;;) { | |
65 | _cleanup_free_ char *options = NULL, *remount_options = NULL; | |
66 | struct libmnt_fs *fs; | |
67 | const char *path, *fstype; | |
68 | unsigned long remount_flags = 0u; | |
69 | bool try_remount_ro, is_api_vfs, is_network; | |
70 | _cleanup_free_ MountPoint *m = NULL; | |
71 | ||
72 | r = mnt_table_next_fs(table, iter, &fs); | |
73 | if (r == 1) /* EOF */ | |
74 | break; | |
75 | if (r < 0) | |
76 | return log_error_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m"); | |
77 | ||
78 | path = mnt_fs_get_target(fs); | |
79 | if (!path) | |
80 | continue; | |
81 | ||
82 | fstype = mnt_fs_get_fstype(fs); | |
83 | ||
84 | /* Combine the generic VFS options with the FS-specific options. Duplicates are not a problem | |
85 | * here, because the only options that should come up twice are typically ro/rw, which are | |
86 | * turned into MS_RDONLY or the inversion of it. | |
87 | * | |
88 | * Even if there are duplicates later in mount_option_mangle() they shouldn't hurt anyways as | |
89 | * they override each other. */ | |
90 | if (!strextend_with_separator(&options, ",", mnt_fs_get_vfs_options(fs))) | |
91 | return log_oom(); | |
92 | if (!strextend_with_separator(&options, ",", mnt_fs_get_fs_options(fs))) | |
93 | return log_oom(); | |
94 | ||
95 | /* Ignore mount points we can't unmount because they are API or because we are keeping them | |
96 | * open (like /dev/console). Also, ignore all mounts below API file systems, since they are | |
97 | * likely virtual too, and hence not worth spending time on. Also, in unprivileged containers | |
98 | * we might lack the rights to unmount these things, hence don't bother. */ | |
99 | if (mount_point_is_api(path) || | |
100 | mount_point_ignore(path) || | |
101 | path_below_api_vfs(path)) | |
102 | continue; | |
103 | ||
104 | is_network = fstype_is_network(fstype); | |
105 | is_api_vfs = fstype_is_api_vfs(fstype); | |
106 | ||
107 | /* If we are in a container, don't attempt to read-only mount anything as that brings no real | |
108 | * benefits, but might confuse the host, as we remount the superblock here, not the bind | |
109 | * mount. | |
110 | * | |
111 | * If the filesystem is a network fs, also skip the remount. It brings no value (we cannot | |
112 | * leave a "dirty fs") and could hang if the network is down. Note that umount2() is more | |
113 | * careful and will not hang because of the network being down. */ | |
114 | try_remount_ro = detect_container() <= 0 && | |
115 | !is_network && | |
116 | !is_api_vfs && | |
117 | !fstype_is_ro(fstype) && | |
118 | !fstab_test_yes_no_option(options, "ro\0rw\0"); | |
119 | ||
120 | if (try_remount_ro) { | |
121 | /* mount(2) states that mount flags and options need to be exactly the same as they | |
122 | * were when the filesystem was mounted, except for the desired changes. So we | |
123 | * reconstruct both here and adjust them for the later remount call too. */ | |
124 | ||
125 | r = mnt_fs_get_propagation(fs, &remount_flags); | |
126 | if (r < 0) { | |
127 | log_warning_errno(r, "mnt_fs_get_propagation() failed for %s, ignoring: %m", path); | |
128 | continue; | |
129 | } | |
130 | ||
131 | r = mount_option_mangle(options, remount_flags, &remount_flags, &remount_options); | |
132 | if (r < 0) { | |
133 | log_warning_errno(r, "mount_option_mangle failed for %s, ignoring: %m", path); | |
134 | continue; | |
135 | } | |
136 | ||
137 | /* MS_BIND is special. If it is provided it will only make the mount-point | |
138 | * read-only. If left out, the super block itself is remounted, which we want. */ | |
139 | remount_flags = (remount_flags|MS_REMOUNT|MS_RDONLY) & ~MS_BIND; | |
140 | } | |
141 | ||
142 | m = new(MountPoint, 1); | |
143 | if (!m) | |
144 | return log_oom(); | |
145 | ||
146 | r = libmount_is_leaf(table, fs); | |
147 | if (r < 0) | |
148 | return log_error_errno(r, "Failed to get children mounts for %s from /proc/self/mountinfo: %m", path); | |
149 | bool leaf = r; | |
150 | ||
151 | *m = (MountPoint) { | |
152 | .remount_options = remount_options, | |
153 | .remount_flags = remount_flags, | |
154 | .try_remount_ro = try_remount_ro, | |
155 | ||
156 | /* Unmount sysfs/procfs/… lazily, since syncing doesn't matter there, and it's OK if | |
157 | * something keeps an fd open to it. */ | |
158 | .umount_lazily = is_api_vfs, | |
159 | ||
160 | /* If a mount point is not a leaf, moving it would invalidate our mount table. | |
161 | * If a mount point is on the network and the network is down, it can hang and block | |
162 | * the shutdown. */ | |
163 | .umount_move_if_busy = leaf && !is_network, | |
164 | }; | |
165 | ||
166 | m->path = strdup(path); | |
167 | if (!m->path) | |
168 | return log_oom(); | |
169 | ||
170 | TAKE_PTR(remount_options); | |
171 | ||
172 | LIST_PREPEND(mount_point, *head, TAKE_PTR(m)); | |
173 | } | |
174 | ||
175 | return 0; | |
176 | } | |
177 | ||
178 | static bool nonunmountable_path(const char *path) { | |
179 | assert(path); | |
180 | ||
181 | return PATH_IN_SET(path, "/", "/usr") || | |
182 | path_startswith(path, "/run/initramfs"); | |
183 | } | |
184 | ||
185 | static void log_umount_blockers(const char *mnt) { | |
186 | _cleanup_free_ char *blockers = NULL; | |
187 | int r; | |
188 | ||
189 | _cleanup_closedir_ DIR *dir = opendir("/proc"); | |
190 | if (!dir) | |
191 | return (void) log_warning_errno(errno, "Failed to open %s: %m", "/proc/"); | |
192 | ||
193 | FOREACH_DIRENT_ALL(de, dir, break) { | |
194 | if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN)) | |
195 | continue; | |
196 | ||
197 | pid_t pid; | |
198 | if (parse_pid(de->d_name, &pid) < 0) | |
199 | continue; | |
200 | ||
201 | _cleanup_free_ char *fdp = path_join(de->d_name, "fd"); | |
202 | if (!fdp) | |
203 | return (void) log_oom(); | |
204 | ||
205 | _cleanup_closedir_ DIR *fd_dir = xopendirat(dirfd(dir), fdp, 0); | |
206 | if (!fd_dir) { | |
207 | if (errno != ENOENT) /* process gone by now? */ | |
208 | log_debug_errno(errno, "Failed to open /proc/%s/, ignoring: %m",fdp); | |
209 | continue; | |
210 | } | |
211 | ||
212 | bool culprit = false; | |
213 | FOREACH_DIRENT(fd_de, fd_dir, break) { | |
214 | _cleanup_free_ char *open_file = NULL; | |
215 | ||
216 | r = readlinkat_malloc(dirfd(fd_dir), fd_de->d_name, &open_file); | |
217 | if (r < 0) { | |
218 | if (r != -ENOENT) /* fd closed by now */ | |
219 | log_debug_errno(r, "Failed to read link /proc/%s/%s, ignoring: %m", fdp, fd_de->d_name); | |
220 | continue; | |
221 | } | |
222 | ||
223 | if (path_startswith(open_file, mnt)) { | |
224 | culprit = true; | |
225 | break; | |
226 | } | |
227 | } | |
228 | ||
229 | if (!culprit) | |
230 | continue; | |
231 | ||
232 | _cleanup_free_ char *comm = NULL; | |
233 | r = pid_get_comm(pid, &comm); | |
234 | if (r < 0) { | |
235 | if (r != -ESRCH) /* process gone by now */ | |
236 | log_debug_errno(r, "Failed to read process name of PID " PID_FMT ": %m", pid); | |
237 | continue; | |
238 | } | |
239 | ||
240 | if (!strextend_with_separator(&blockers, ", ", comm)) | |
241 | return (void) log_oom(); | |
242 | ||
243 | if (!strextend(&blockers, "(", de->d_name, ")")) | |
244 | return (void) log_oom(); | |
245 | } | |
246 | ||
247 | if (blockers) | |
248 | log_warning("Unmounting '%s' blocked by: %s", mnt, blockers); | |
249 | } | |
250 | ||
251 | static int remount_with_timeout(MountPoint *m, bool last_try) { | |
252 | _cleanup_close_pair_ int pfd[2] = EBADF_PAIR; | |
253 | _cleanup_(sigkill_nowaitp) pid_t pid = 0; | |
254 | int r; | |
255 | ||
256 | BLOCK_SIGNALS(SIGCHLD); | |
257 | ||
258 | assert(m); | |
259 | ||
260 | r = pipe2(pfd, O_CLOEXEC|O_NONBLOCK); | |
261 | if (r < 0) | |
262 | return r; | |
263 | ||
264 | /* Due to the possibility of a remount operation hanging, we fork a child process and set a | |
265 | * timeout. If the timeout lapses, the assumption is that the particular remount failed. */ | |
266 | r = safe_fork_full("(sd-remount)", | |
267 | NULL, | |
268 | pfd, ELEMENTSOF(pfd), | |
269 | FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid); | |
270 | if (r < 0) | |
271 | return r; | |
272 | if (r == 0) { | |
273 | pfd[0] = safe_close(pfd[0]); | |
274 | ||
275 | log_info("Remounting '%s' read-only with options '%s'.", m->path, strempty(m->remount_options)); | |
276 | ||
277 | /* Start the mount operation here in the child */ | |
278 | r = mount(NULL, m->path, NULL, m->remount_flags, m->remount_options); | |
279 | if (r < 0) | |
280 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, | |
281 | errno, | |
282 | "Failed to remount '%s' read-only: %m", | |
283 | m->path); | |
284 | ||
285 | report_errno_and_exit(pfd[1], r); | |
286 | } | |
287 | ||
288 | pfd[1] = safe_close(pfd[1]); | |
289 | ||
290 | r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC); | |
291 | if (r == -ETIMEDOUT) | |
292 | log_error_errno(r, "Remounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid); | |
293 | else if (r == -EPROTO) { | |
294 | /* Try to read error code from child */ | |
295 | if (read(pfd[0], &r, sizeof(r)) == sizeof(r)) | |
296 | log_debug_errno(r, "Remounting '%s' failed abnormally, child process " PID_FMT " failed: %m", m->path, pid); | |
297 | else | |
298 | r = log_debug_errno(EPROTO, "Remounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid); | |
299 | TAKE_PID(pid); /* child exited (just not as we expected) hence don't kill anymore */ | |
300 | } else if (r < 0) | |
301 | log_error_errno(r, "Remounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid); | |
302 | ||
303 | return r; | |
304 | } | |
305 | ||
306 | static int umount_with_timeout(MountPoint *m, bool last_try) { | |
307 | _cleanup_close_pair_ int pfd[2] = EBADF_PAIR; | |
308 | _cleanup_(sigkill_nowaitp) pid_t pid = 0; | |
309 | int r; | |
310 | ||
311 | BLOCK_SIGNALS(SIGCHLD); | |
312 | ||
313 | assert(m); | |
314 | ||
315 | r = pipe2(pfd, O_CLOEXEC|O_NONBLOCK); | |
316 | if (r < 0) | |
317 | return r; | |
318 | ||
319 | /* Due to the possibility of a umount operation hanging, we fork a child process and set a | |
320 | * timeout. If the timeout lapses, the assumption is that the particular umount failed. */ | |
321 | r = safe_fork_full("(sd-umount)", | |
322 | NULL, | |
323 | pfd, ELEMENTSOF(pfd), | |
324 | FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid); | |
325 | if (r < 0) | |
326 | return r; | |
327 | if (r == 0) { | |
328 | pfd[0] = safe_close(pfd[0]); | |
329 | ||
330 | log_info("Unmounting '%s'.", m->path); | |
331 | ||
332 | /* Start the mount operation here in the child Using MNT_FORCE causes some filesystems | |
333 | * (e.g. FUSE and NFS and other network filesystems) to abort any pending requests and return | |
334 | * -EIO rather than blocking indefinitely. If the filesysten is "busy", this may allow | |
335 | * processes to die, thus making the filesystem less busy so the unmount might succeed | |
336 | * (rather than return EBUSY). */ | |
337 | r = RET_NERRNO(umount2(m->path, | |
338 | UMOUNT_NOFOLLOW | /* Don't follow symlinks: this should never happen unless our mount list was wrong */ | |
339 | (m->umount_lazily ? MNT_DETACH : MNT_FORCE))); | |
340 | if (r < 0) { | |
341 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Failed to unmount %s: %m", m->path); | |
342 | ||
343 | if (r == -EBUSY && last_try) | |
344 | log_umount_blockers(m->path); | |
345 | } | |
346 | ||
347 | report_errno_and_exit(pfd[1], r); | |
348 | } | |
349 | ||
350 | pfd[1] = safe_close(pfd[1]); | |
351 | ||
352 | r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC); | |
353 | if (r == -ETIMEDOUT) | |
354 | log_error_errno(r, "Unmounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid); | |
355 | else if (r == -EPROTO) { | |
356 | /* Try to read error code from child */ | |
357 | if (read(pfd[0], &r, sizeof(r)) == sizeof(r)) | |
358 | log_debug_errno(r, "Unmounting '%s' failed abnormally, child process " PID_FMT " failed: %m", m->path, pid); | |
359 | else | |
360 | r = log_debug_errno(EPROTO, "Unmounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid); | |
361 | TAKE_PID(pid); /* It died, but abnormally, no purpose in killing */ | |
362 | } else if (r < 0) | |
363 | log_error_errno(r, "Unmounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid); | |
364 | ||
365 | return r; | |
366 | } | |
367 | ||
368 | /* This includes remounting readonly, which changes the kernel mount options. Therefore the list passed to | |
369 | * this function is invalidated, and should not be reused. */ | |
370 | static int mount_points_list_umount(MountPoint **head, bool *changed, bool last_try) { | |
371 | int n_failed = 0, r; | |
372 | _cleanup_free_ char *resolved_mounts_path = NULL; | |
373 | ||
374 | assert(head); | |
375 | assert(changed); | |
376 | ||
377 | LIST_FOREACH(mount_point, m, *head) { | |
378 | if (m->try_remount_ro) { | |
379 | /* We always try to remount directories read-only first, before we go on and umount | |
380 | * them. | |
381 | * | |
382 | * Mount points can be stacked. If a mount point is stacked below / or /usr, we | |
383 | * cannot umount or remount it directly, since there is no way to refer to the | |
384 | * underlying mount. There's nothing we can do about it for the general case, but we | |
385 | * can do something about it if it is aliased somewhere else via a bind mount. If we | |
386 | * explicitly remount the super block of that alias read-only we hence should be | |
387 | * relatively safe regarding keeping a dirty fs we cannot otherwise see. | |
388 | * | |
389 | * Since the remount can hang in the instance of remote filesystems, we remount | |
390 | * asynchronously and skip the subsequent umount if it fails. */ | |
391 | if (remount_with_timeout(m, last_try) < 0) { | |
392 | /* Remount failed, but try unmounting anyway, | |
393 | * unless this is a mount point we want to skip. */ | |
394 | if (nonunmountable_path(m->path)) { | |
395 | n_failed++; | |
396 | continue; | |
397 | } | |
398 | } | |
399 | } | |
400 | ||
401 | /* Skip / and /usr since we cannot unmount that anyway, since we are running from it. They | |
402 | * have already been remounted ro. */ | |
403 | if (nonunmountable_path(m->path)) | |
404 | continue; | |
405 | ||
406 | /* Trying to umount */ | |
407 | r = umount_with_timeout(m, last_try); | |
408 | if (r < 0) | |
409 | n_failed++; | |
410 | else | |
411 | *changed = true; | |
412 | ||
413 | /* If a mount is busy, we move it to not keep parent mount points busy. | |
414 | * More moving will occur in next iteration with a fresh mount table. | |
415 | */ | |
416 | if (r != -EBUSY || !m->umount_move_if_busy) | |
417 | continue; | |
418 | ||
419 | _cleanup_free_ char *dirname = NULL; | |
420 | ||
421 | r = path_extract_directory(m->path, &dirname); | |
422 | if (r < 0) { | |
423 | n_failed++; | |
424 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Cannot find directory for %s: %m", m->path); | |
425 | continue; | |
426 | } | |
427 | ||
428 | /* We need to canonicalize /run/shutdown/mounts. We cannot compare inodes, since /run | |
429 | * might be bind mounted somewhere we want to unmount. And we need to move all mounts in | |
430 | * /run/shutdown/mounts from there. | |
431 | */ | |
432 | if (!resolved_mounts_path) | |
433 | (void) chase("/run/shutdown/mounts", NULL, 0, &resolved_mounts_path, NULL); | |
434 | if (!path_equal(dirname, resolved_mounts_path)) { | |
435 | char newpath[STRLEN("/run/shutdown/mounts/") + 16 + 1]; | |
436 | ||
437 | xsprintf(newpath, "/run/shutdown/mounts/%016" PRIx64, random_u64()); | |
438 | ||
439 | /* on error of is_dir, assume directory */ | |
440 | if (is_dir(m->path, true) != 0) { | |
441 | r = mkdir_p(newpath, 0000); | |
442 | if (r < 0) { | |
443 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Could not create directory %s: %m", newpath); | |
444 | continue; | |
445 | } | |
446 | } else { | |
447 | r = touch_file(newpath, /* parents= */ true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0700); | |
448 | if (r < 0) { | |
449 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Could not create file %s: %m", newpath); | |
450 | continue; | |
451 | } | |
452 | } | |
453 | ||
454 | log_info("Moving mount %s to %s.", m->path, newpath); | |
455 | ||
456 | r = RET_NERRNO(mount(m->path, newpath, NULL, MS_MOVE, NULL)); | |
457 | if (r < 0) { | |
458 | n_failed++; | |
459 | log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Could not move %s to %s: %m", m->path, newpath); | |
460 | } else | |
461 | *changed = true; | |
462 | } | |
463 | } | |
464 | ||
465 | return n_failed; | |
466 | } | |
467 | ||
468 | static int umount_all_once(bool *changed, bool last_try) { | |
469 | _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head); | |
470 | int r; | |
471 | ||
472 | assert(changed); | |
473 | ||
474 | LIST_HEAD_INIT(mp_list_head); | |
475 | r = mount_points_list_get(NULL, &mp_list_head); | |
476 | if (r < 0) | |
477 | return r; | |
478 | ||
479 | return mount_points_list_umount(&mp_list_head, changed, last_try); | |
480 | } | |
481 | ||
482 | int umount_all(bool *changed, bool last_try) { | |
483 | bool umount_changed; | |
484 | int r; | |
485 | ||
486 | assert(changed); | |
487 | ||
488 | /* Retry umount, until nothing can be umounted anymore. Mounts are processed in order, newest | |
489 | * first. The retries are needed when an old mount has been moved, to a path inside a newer mount. */ | |
490 | do { | |
491 | umount_changed = false; | |
492 | ||
493 | r = umount_all_once(&umount_changed, last_try); | |
494 | if (umount_changed) | |
495 | *changed = true; | |
496 | } while (umount_changed); | |
497 | ||
498 | return r; | |
499 | } |