1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 Copyright © 2010 ProFUSION embedded systems
10 #include "alloc-util.h"
12 #include "constants.h"
13 #include "dirent-util.h"
14 #include "errno-util.h"
17 #include "format-util.h"
19 #include "fstab-util.h"
20 #include "libmount-util.h"
23 #include "mount-setup.h"
24 #include "mount-util.h"
25 #include "mountpoint-util.h"
26 #include "parse-util.h"
27 #include "process-util.h"
28 #include "random-util.h"
29 #include "signal-util.h"
30 #include "stat-util.h"
31 #include "string-util.h"
35 static void mount_point_free(MountPoint
**head
, MountPoint
*m
) {
39 LIST_REMOVE(mount_point
, *head
, m
);
42 free(m
->remount_options
);
46 void mount_points_list_free(MountPoint
**head
) {
50 mount_point_free(head
, *head
);
53 int mount_points_list_get(FILE *f
, MountPoint
**head
) {
54 _cleanup_(mnt_free_tablep
) struct libmnt_table
*table
= NULL
;
55 _cleanup_(mnt_free_iterp
) struct libmnt_iter
*iter
= NULL
;
60 r
= libmount_parse_mountinfo(f
, &table
, &iter
);
62 return log_error_errno(r
, "Failed to parse /proc/self/mountinfo: %m");
65 _cleanup_free_
char *options
= NULL
, *remount_options
= NULL
;
67 const char *path
, *fstype
;
68 unsigned long remount_flags
= 0u;
69 bool try_remount_ro
, is_api_vfs
, is_network
;
70 _cleanup_free_ MountPoint
*m
= NULL
;
72 r
= mnt_table_next_fs(table
, iter
, &fs
);
76 return log_error_errno(r
, "Failed to get next entry from /proc/self/mountinfo: %m");
78 path
= mnt_fs_get_target(fs
);
82 fstype
= mnt_fs_get_fstype(fs
);
84 /* Combine the generic VFS options with the FS-specific options. Duplicates are not a problem
85 * here, because the only options that should come up twice are typically ro/rw, which are
86 * turned into MS_RDONLY or the inversion of it.
88 * Even if there are duplicates later in mount_option_mangle() they shouldn't hurt anyways as
89 * they override each other. */
90 if (!strextend_with_separator(&options
, ",", mnt_fs_get_vfs_options(fs
)))
92 if (!strextend_with_separator(&options
, ",", mnt_fs_get_fs_options(fs
)))
95 /* Ignore mount points we can't unmount because they are API or because we are keeping them
96 * open (like /dev/console). Also, ignore all mounts below API file systems, since they are
97 * likely virtual too, and hence not worth spending time on. Also, in unprivileged containers
98 * we might lack the rights to unmount these things, hence don't bother. */
99 if (mount_point_is_api(path
) ||
100 mount_point_ignore(path
) ||
101 path_below_api_vfs(path
))
104 is_network
= fstype_is_network(fstype
);
105 is_api_vfs
= fstype_is_api_vfs(fstype
);
107 /* If we are in a container, don't attempt to read-only mount anything as that brings no real
108 * benefits, but might confuse the host, as we remount the superblock here, not the bind
111 * If the filesystem is a network fs, also skip the remount. It brings no value (we cannot
112 * leave a "dirty fs") and could hang if the network is down. Note that umount2() is more
113 * careful and will not hang because of the network being down. */
114 try_remount_ro
= detect_container() <= 0 &&
117 !fstype_is_ro(fstype
) &&
118 !fstab_test_yes_no_option(options
, "ro\0rw\0");
120 if (try_remount_ro
) {
121 /* mount(2) states that mount flags and options need to be exactly the same as they
122 * were when the filesystem was mounted, except for the desired changes. So we
123 * reconstruct both here and adjust them for the later remount call too. */
125 r
= mnt_fs_get_propagation(fs
, &remount_flags
);
127 log_warning_errno(r
, "mnt_fs_get_propagation() failed for %s, ignoring: %m", path
);
131 r
= mount_option_mangle(options
, remount_flags
, &remount_flags
, &remount_options
);
133 log_warning_errno(r
, "mount_option_mangle failed for %s, ignoring: %m", path
);
137 /* MS_BIND is special. If it is provided it will only make the mount-point
138 * read-only. If left out, the super block itself is remounted, which we want. */
139 remount_flags
= (remount_flags
|MS_REMOUNT
|MS_RDONLY
) & ~MS_BIND
;
142 m
= new(MountPoint
, 1);
146 r
= libmount_is_leaf(table
, fs
);
148 return log_error_errno(r
, "Failed to get children mounts for %s from /proc/self/mountinfo: %m", path
);
152 .remount_options
= remount_options
,
153 .remount_flags
= remount_flags
,
154 .try_remount_ro
= try_remount_ro
,
156 /* Unmount sysfs/procfs/… lazily, since syncing doesn't matter there, and it's OK if
157 * something keeps an fd open to it. */
158 .umount_lazily
= is_api_vfs
,
160 /* If a mount point is not a leaf, moving it would invalidate our mount table.
161 * If a mount point is on the network and the network is down, it can hang and block
163 .umount_move_if_busy
= leaf
&& !is_network
,
166 m
->path
= strdup(path
);
170 TAKE_PTR(remount_options
);
172 LIST_PREPEND(mount_point
, *head
, TAKE_PTR(m
));
178 static bool nonunmountable_path(const char *path
) {
181 return PATH_IN_SET(path
, "/", "/usr") ||
182 path_startswith(path
, "/run/initramfs");
185 static void log_umount_blockers(const char *mnt
) {
186 _cleanup_free_
char *blockers
= NULL
;
189 _cleanup_closedir_
DIR *dir
= opendir("/proc");
191 return (void) log_warning_errno(errno
, "Failed to open %s: %m", "/proc/");
193 FOREACH_DIRENT_ALL(de
, dir
, break) {
194 if (!IN_SET(de
->d_type
, DT_DIR
, DT_UNKNOWN
))
198 if (parse_pid(de
->d_name
, &pid
) < 0)
201 _cleanup_free_
char *fdp
= path_join(de
->d_name
, "fd");
203 return (void) log_oom();
205 _cleanup_closedir_
DIR *fd_dir
= xopendirat(dirfd(dir
), fdp
, 0);
207 if (errno
!= ENOENT
) /* process gone by now? */
208 log_debug_errno(errno
, "Failed to open /proc/%s/, ignoring: %m",fdp
);
212 bool culprit
= false;
213 FOREACH_DIRENT(fd_de
, fd_dir
, break) {
214 _cleanup_free_
char *open_file
= NULL
;
216 r
= readlinkat_malloc(dirfd(fd_dir
), fd_de
->d_name
, &open_file
);
218 if (r
!= -ENOENT
) /* fd closed by now */
219 log_debug_errno(r
, "Failed to read link /proc/%s/%s, ignoring: %m", fdp
, fd_de
->d_name
);
223 if (path_startswith(open_file
, mnt
)) {
232 _cleanup_free_
char *comm
= NULL
;
233 r
= pid_get_comm(pid
, &comm
);
235 if (r
!= -ESRCH
) /* process gone by now */
236 log_debug_errno(r
, "Failed to read process name of PID " PID_FMT
": %m", pid
);
240 if (!strextend_with_separator(&blockers
, ", ", comm
))
241 return (void) log_oom();
243 if (!strextend(&blockers
, "(", de
->d_name
, ")"))
244 return (void) log_oom();
248 log_warning("Unmounting '%s' blocked by: %s", mnt
, blockers
);
251 static int remount_with_timeout(MountPoint
*m
, bool last_try
) {
252 _cleanup_close_pair_
int pfd
[2] = EBADF_PAIR
;
253 _cleanup_(sigkill_nowaitp
) pid_t pid
= 0;
256 BLOCK_SIGNALS(SIGCHLD
);
260 r
= pipe2(pfd
, O_CLOEXEC
|O_NONBLOCK
);
264 /* Due to the possibility of a remount operation hanging, we fork a child process and set a
265 * timeout. If the timeout lapses, the assumption is that the particular remount failed. */
266 r
= safe_fork_full("(sd-remount)",
268 pfd
, ELEMENTSOF(pfd
),
269 FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_LOG
|FORK_REOPEN_LOG
, &pid
);
273 pfd
[0] = safe_close(pfd
[0]);
275 log_info("Remounting '%s' read-only with options '%s'.", m
->path
, strempty(m
->remount_options
));
277 /* Start the mount operation here in the child */
278 r
= mount(NULL
, m
->path
, NULL
, m
->remount_flags
, m
->remount_options
);
280 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
,
282 "Failed to remount '%s' read-only: %m",
285 report_errno_and_exit(pfd
[1], r
);
288 pfd
[1] = safe_close(pfd
[1]);
290 r
= wait_for_terminate_with_timeout(pid
, DEFAULT_TIMEOUT_USEC
);
292 log_error_errno(r
, "Remounting '%s' timed out, issuing SIGKILL to PID " PID_FMT
".", m
->path
, pid
);
293 else if (r
== -EPROTO
) {
294 /* Try to read error code from child */
295 if (read(pfd
[0], &r
, sizeof(r
)) == sizeof(r
))
296 log_debug_errno(r
, "Remounting '%s' failed abnormally, child process " PID_FMT
" failed: %m", m
->path
, pid
);
298 r
= log_debug_errno(EPROTO
, "Remounting '%s' failed abnormally, child process " PID_FMT
" aborted or exited non-zero.", m
->path
, pid
);
299 TAKE_PID(pid
); /* child exited (just not as we expected) hence don't kill anymore */
301 log_error_errno(r
, "Remounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT
": %m", m
->path
, pid
);
306 static int umount_with_timeout(MountPoint
*m
, bool last_try
) {
307 _cleanup_close_pair_
int pfd
[2] = EBADF_PAIR
;
308 _cleanup_(sigkill_nowaitp
) pid_t pid
= 0;
311 BLOCK_SIGNALS(SIGCHLD
);
315 r
= pipe2(pfd
, O_CLOEXEC
|O_NONBLOCK
);
319 /* Due to the possibility of a umount operation hanging, we fork a child process and set a
320 * timeout. If the timeout lapses, the assumption is that the particular umount failed. */
321 r
= safe_fork_full("(sd-umount)",
323 pfd
, ELEMENTSOF(pfd
),
324 FORK_RESET_SIGNALS
|FORK_CLOSE_ALL_FDS
|FORK_LOG
|FORK_REOPEN_LOG
, &pid
);
328 pfd
[0] = safe_close(pfd
[0]);
330 log_info("Unmounting '%s'.", m
->path
);
332 /* Start the mount operation here in the child Using MNT_FORCE causes some filesystems
333 * (e.g. FUSE and NFS and other network filesystems) to abort any pending requests and return
334 * -EIO rather than blocking indefinitely. If the filesysten is "busy", this may allow
335 * processes to die, thus making the filesystem less busy so the unmount might succeed
336 * (rather than return EBUSY). */
337 r
= RET_NERRNO(umount2(m
->path
,
338 UMOUNT_NOFOLLOW
| /* Don't follow symlinks: this should never happen unless our mount list was wrong */
339 (m
->umount_lazily
? MNT_DETACH
: MNT_FORCE
)));
341 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
, r
, "Failed to unmount %s: %m", m
->path
);
343 if (r
== -EBUSY
&& last_try
)
344 log_umount_blockers(m
->path
);
347 report_errno_and_exit(pfd
[1], r
);
350 pfd
[1] = safe_close(pfd
[1]);
352 r
= wait_for_terminate_with_timeout(pid
, DEFAULT_TIMEOUT_USEC
);
354 log_error_errno(r
, "Unmounting '%s' timed out, issuing SIGKILL to PID " PID_FMT
".", m
->path
, pid
);
355 else if (r
== -EPROTO
) {
356 /* Try to read error code from child */
357 if (read(pfd
[0], &r
, sizeof(r
)) == sizeof(r
))
358 log_debug_errno(r
, "Unmounting '%s' failed abnormally, child process " PID_FMT
" failed: %m", m
->path
, pid
);
360 r
= log_debug_errno(EPROTO
, "Unmounting '%s' failed abnormally, child process " PID_FMT
" aborted or exited non-zero.", m
->path
, pid
);
361 TAKE_PID(pid
); /* It died, but abnormally, no purpose in killing */
363 log_error_errno(r
, "Unmounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT
": %m", m
->path
, pid
);
368 /* This includes remounting readonly, which changes the kernel mount options. Therefore the list passed to
369 * this function is invalidated, and should not be reused. */
370 static int mount_points_list_umount(MountPoint
**head
, bool *changed
, bool last_try
) {
372 _cleanup_free_
char *resolved_mounts_path
= NULL
;
377 LIST_FOREACH(mount_point
, m
, *head
) {
378 if (m
->try_remount_ro
) {
379 /* We always try to remount directories read-only first, before we go on and umount
382 * Mount points can be stacked. If a mount point is stacked below / or /usr, we
383 * cannot umount or remount it directly, since there is no way to refer to the
384 * underlying mount. There's nothing we can do about it for the general case, but we
385 * can do something about it if it is aliased somewhere else via a bind mount. If we
386 * explicitly remount the super block of that alias read-only we hence should be
387 * relatively safe regarding keeping a dirty fs we cannot otherwise see.
389 * Since the remount can hang in the instance of remote filesystems, we remount
390 * asynchronously and skip the subsequent umount if it fails. */
391 if (remount_with_timeout(m
, last_try
) < 0) {
392 /* Remount failed, but try unmounting anyway,
393 * unless this is a mount point we want to skip. */
394 if (nonunmountable_path(m
->path
)) {
401 /* Skip / and /usr since we cannot unmount that anyway, since we are running from it. They
402 * have already been remounted ro. */
403 if (nonunmountable_path(m
->path
))
406 /* Trying to umount */
407 r
= umount_with_timeout(m
, last_try
);
413 /* If a mount is busy, we move it to not keep parent mount points busy.
414 * More moving will occur in next iteration with a fresh mount table.
416 if (r
!= -EBUSY
|| !m
->umount_move_if_busy
)
419 _cleanup_free_
char *dirname
= NULL
;
421 r
= path_extract_directory(m
->path
, &dirname
);
424 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
, r
, "Cannot find directory for %s: %m", m
->path
);
428 /* We need to canonicalize /run/shutdown/mounts. We cannot compare inodes, since /run
429 * might be bind mounted somewhere we want to unmount. And we need to move all mounts in
430 * /run/shutdown/mounts from there.
432 if (!resolved_mounts_path
)
433 (void) chase("/run/shutdown/mounts", NULL
, 0, &resolved_mounts_path
, NULL
);
434 if (!path_equal(dirname
, resolved_mounts_path
)) {
435 char newpath
[STRLEN("/run/shutdown/mounts/") + 16 + 1];
437 xsprintf(newpath
, "/run/shutdown/mounts/%016" PRIx64
, random_u64());
439 /* on error of is_dir, assume directory */
440 if (is_dir(m
->path
, true) != 0) {
441 r
= mkdir_p(newpath
, 0000);
443 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
, r
, "Could not create directory %s: %m", newpath
);
447 r
= touch_file(newpath
, /* parents= */ true, USEC_INFINITY
, UID_INVALID
, GID_INVALID
, 0700);
449 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
, r
, "Could not create file %s: %m", newpath
);
454 log_info("Moving mount %s to %s.", m
->path
, newpath
);
456 r
= RET_NERRNO(mount(m
->path
, newpath
, NULL
, MS_MOVE
, NULL
));
459 log_full_errno(last_try
? LOG_ERR
: LOG_INFO
, r
, "Could not move %s to %s: %m", m
->path
, newpath
);
468 static int umount_all_once(bool *changed
, bool last_try
) {
469 _cleanup_(mount_points_list_free
) LIST_HEAD(MountPoint
, mp_list_head
);
474 LIST_HEAD_INIT(mp_list_head
);
475 r
= mount_points_list_get(NULL
, &mp_list_head
);
479 return mount_points_list_umount(&mp_list_head
, changed
, last_try
);
482 int umount_all(bool *changed
, bool last_try
) {
488 /* Retry umount, until nothing can be umounted anymore. Mounts are processed in order, newest
489 * first. The retries are needed when an old mount has been moved, to a path inside a newer mount. */
491 umount_changed
= false;
493 r
= umount_all_once(&umount_changed
, last_try
);
496 } while (umount_changed
);