]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/switch-root.c
update NEWS with even more features for v258
[thirdparty/systemd.git] / src / shared / switch-root.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
41669317 2
07630cea 3#include <fcntl.h>
41669317 4#include <sys/mount.h>
07630cea 5#include <sys/stat.h>
41669317 6#include <unistd.h>
41669317 7
b78d73fa 8#include "alloc-util.h"
971ff8c7 9#include "base-filesystem.h"
f461a28d 10#include "chase.h"
69a283c5 11#include "errno-util.h"
3ffd4af2 12#include "fd-util.h"
a8fbdf54 13#include "log.h"
f5947a5e 14#include "missing_syscall.h"
35cd0ba5 15#include "mkdir-label.h"
e5b42203 16#include "mount-util.h"
049af8ad 17#include "mountpoint-util.h"
07630cea 18#include "rm-rf.h"
69a283c5 19#include "stat-util.h"
c6878637 20#include "switch-root.h"
41669317 21
e5b42203 22int switch_root(const char *new_root,
f2c1d491 23 const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */
2932161b 24 SwitchRootFlags flags) {
41669317 25
4eec099d
MY
26 /* Stuff mounted below /run/ we don't save on soft reboot, as it might have lost its relevance,
27 * e.g. removable media and such. We rather want that the new boot mounts this fresh. But on
c2d62118
LP
28 * the switch from initrd we do use MS_REC, as it is expected that mounts set up in /run/ are
29 * maintained. */
30 static const struct {
7c764d45 31 const char *path;
c2d62118
LP
32 unsigned long mount_flags; /* Flags to apply if SWITCH_ROOT_RECURSIVE_RUN is unset */
33 unsigned long mount_flags_recursive_run; /* Flags to apply if SWITCH_ROOT_RECURSIVE_RUN is set (0 if shall be skipped) */
7c764d45 34 } transfer_table[] = {
07c5c2ab
MY
35 { "/dev", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Recursive, because we want to save the original /dev/shm/ + /dev/pts/ and similar */
36 { "/sys", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Similar, we want to retain various API VFS, or the cgroupv1 /sys/fs/cgroup/ tree */
37 { "/proc", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Similar */
38 { "/run", MS_BIND, MS_BIND|MS_REC }, /* Recursive except on soft reboot, see above */
39 { "/run/credentials", MS_BIND|MS_REC, 0 /* skip! */ }, /* Credential mounts should survive */
40 { "/run/host", MS_BIND|MS_REC, 0 /* skip! */ }, /* Host supplied hierarchy should also survive */
7c764d45
LP
41 };
42
f2c1d491 43 _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF;
e5b42203 44 _cleanup_free_ char *resolved_old_root_after = NULL;
f2c1d491 45 int r, istmp;
e5b42203
LP
46
47 assert(new_root);
41669317 48
e5b42203 49 /* Check if we shall remove the contents of the old root */
f2c1d491 50 old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC);
a940f507
ZJS
51 if (old_root_fd < 0)
52 return log_error_errno(errno, "Failed to open root directory: %m");
f2c1d491 53
e51d8e0a 54 new_root_fd = open(new_root, O_PATH|O_DIRECTORY|O_CLOEXEC);
5268188d
LP
55 if (new_root_fd < 0)
56 return log_error_errno(errno, "Failed to open target directory '%s': %m", new_root);
57
f83a74df 58 r = fds_are_same_mount(old_root_fd, new_root_fd);
5268188d 59 if (r < 0)
f83a74df 60 return log_error_errno(r, "Failed to check if old and new root directory/mount are the same: %m");
5268188d 61 if (r > 0) {
f83a74df 62 log_debug("Skipping switch root, as old and new root directories/mounts are the same.");
5268188d
LP
63 return 0;
64 }
65
f717d7a4
LP
66 /* Make the new root directory a mount point if it isn't */
67 r = fd_make_mount_point(new_root_fd);
68 if (r < 0)
69 return log_error_errno(r, "Failed to make new root directory a mount point: %m");
21596626
YW
70 if (r > 0) {
71 int fd;
72
73 /* When the path was not a mount point, then we need to reopen the path, otherwise, it still
74 * points to the underlying directory. */
75
76 fd = open(new_root, O_DIRECTORY|O_CLOEXEC);
77 if (fd < 0)
78 return log_error_errno(errno, "Failed to reopen target directory '%s': %m", new_root);
79
80 close_and_replace(new_root_fd, fd);
81 }
f717d7a4 82
2932161b
LP
83 if (FLAGS_SET(flags, SWITCH_ROOT_DESTROY_OLD_ROOT)) {
84 istmp = fd_is_temporary_fs(old_root_fd);
85 if (istmp < 0)
86 return log_error_errno(istmp, "Failed to stat root directory: %m");
87 if (istmp > 0)
88 log_debug("Root directory is on tmpfs, will do cleanup later.");
89 } else
90 istmp = -1; /* don't know */
f2c1d491 91
f2c1d491
LP
92 if (old_root_after) {
93 /* Determine where we shall place the old root after the transition */
94 r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
95 if (r < 0)
96 return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
97 if (r == 0) /* Doesn't exist yet. Let's create it */
98 (void) mkdir_p_label(resolved_old_root_after, 0755);
99 }
100
5221b55b
LP
101 /* We are about to unmount various file systems with MNT_DETACH (either explicitly via umount() or
102 * indirectly via pivot_root()), and thus do not synchronously wait for them to be fully sync'ed —
103 * all while making them invisible/inaccessible in the file system tree for later code. That makes
104 * sync'ing them then difficult. Let's hence issue a manual sync() here, so that we at least can
105 * guarantee all file systems are an a good state before entering this state. */
1795252c
LP
106 if (!FLAGS_SET(flags, SWITCH_ROOT_DONT_SYNC))
107 sync();
5221b55b 108
f2c1d491
LP
109 /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted
110 * MS_SHARED. Hence remount them MS_PRIVATE here as a work-around.
f47fc355
LP
111 *
112 * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
113 if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
e5b42203
LP
114 return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
115
e5b42203
LP
116 /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
117 * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
118 * and switch_root() nevertheless. */
8aefedce 119 (void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID);
971ff8c7 120
85471164 121 FOREACH_ELEMENT(transfer, transfer_table) {
7c764d45 122 _cleanup_free_ char *chased = NULL;
c2d62118 123 unsigned long mount_flags;
7c764d45 124
c2d62118
LP
125 mount_flags = FLAGS_SET(flags, SWITCH_ROOT_RECURSIVE_RUN) ? transfer->mount_flags_recursive_run : transfer->mount_flags;
126 if (mount_flags == 0) /* skip if zero */
b12d41a8
LB
127 continue;
128
7c764d45
LP
129 if (access(transfer->path, F_OK) < 0) {
130 log_debug_errno(errno, "Path '%s' to move to target root directory, not found, ignoring: %m", transfer->path);
131 continue;
132 }
133
134 r = chase(transfer->path, new_root, CHASE_PREFIX_ROOT, &chased, NULL);
135 if (r < 0)
136 return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, transfer->path);
137
138 /* Let's see if it is a mount point already. */
b409aacb 139 r = path_is_mount_point(chased);
7c764d45
LP
140 if (r < 0)
141 return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
142 if (r > 0) /* If it is already mounted, then do nothing */
143 continue;
144
c2d62118 145 r = mount_nofollow_verbose(LOG_ERR, transfer->path, chased, NULL, mount_flags, NULL);
7c764d45
LP
146 if (r < 0)
147 return r;
148 }
149
f2c1d491 150 if (fchdir(new_root_fd) < 0)
4a62c710 151 return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
41669317 152
e5b42203
LP
153 /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
154 * that's not possible however, and hence we simply overmount root */
f2c1d491
LP
155 if (resolved_old_root_after)
156 r = RET_NERRNO(pivot_root(".", resolved_old_root_after));
157 else {
158 r = RET_NERRNO(pivot_root(".", "."));
159 if (r >= 0) {
160 /* Now unmount the upper of the two stacked file systems */
161 if (umount2(".", MNT_DETACH) < 0)
162 return log_error_errno(errno, "Failed to unmount the old root: %m");
e5b42203 163 }
f2c1d491
LP
164 }
165 if (r < 0) {
166 log_debug_errno(r, "Pivoting root file system failed, moving mounts instead: %m");
891a4918 167
95648f9e
LP
168 if (resolved_old_root_after) {
169 r = mount_nofollow_verbose(LOG_ERR, "/", resolved_old_root_after, NULL, MS_BIND|MS_REC, NULL);
170 if (r < 0)
171 return r;
172 }
173
268d1244
LP
174 /* If we have to use MS_MOVE let's first try to get rid of *all* mounts we can, with the
175 * exception of the path we want to switch to, plus everything leading to it and within
176 * it. This is necessary because unlike pivot_root() just moving the mount to the root via
177 * MS_MOVE won't magically unmount anything below it. Once the chroot() succeeds the mounts
178 * below would still be around but invisible to us, because not accessible via
179 * /proc/self/mountinfo. Hence, let's clean everything up first, as long as we still can. */
c6c5d20d 180 (void) umount_recursive_full(NULL, MNT_DETACH, STRV_MAKE(new_root));
268d1244 181
f2c1d491
LP
182 if (mount(".", "/", NULL, MS_MOVE, NULL) < 0)
183 return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
41669317 184
f2c1d491
LP
185 if (chroot(".") < 0)
186 return log_error_errno(errno, "Failed to change root: %m");
41669317 187
f2c1d491
LP
188 if (chdir(".") < 0)
189 return log_error_errno(errno, "Failed to change directory: %m");
190 }
ac0930c8 191
2932161b 192 if (istmp > 0) {
41669317
LP
193 struct stat rb;
194
195 if (fstat(old_root_fd, &rb) < 0)
a940f507 196 return log_error_errno(errno, "Failed to stat old root directory: %m");
f2c1d491 197
84b4c785
LP
198 /* Note: the below won't operate on non-memory file systems (i.e. only on tmpfs, ramfs), and
199 * it will stop at mount boundaries */
a940f507 200 (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
41669317
LP
201 }
202
03e334a1 203 return 0;
41669317 204}