]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
41669317 | 2 | |
41669317 | 3 | #include <errno.h> |
07630cea | 4 | #include <fcntl.h> |
a8fbdf54 | 5 | #include <limits.h> |
07630cea | 6 | #include <stdbool.h> |
41669317 | 7 | #include <sys/mount.h> |
07630cea | 8 | #include <sys/stat.h> |
41669317 | 9 | #include <unistd.h> |
41669317 | 10 | |
971ff8c7 | 11 | #include "base-filesystem.h" |
f461a28d | 12 | #include "chase.h" |
7c764d45 | 13 | #include "creds-util.h" |
3ffd4af2 | 14 | #include "fd-util.h" |
baa6a42d | 15 | #include "initrd-util.h" |
a8fbdf54 | 16 | #include "log.h" |
f5947a5e | 17 | #include "missing_syscall.h" |
35cd0ba5 | 18 | #include "mkdir-label.h" |
e5b42203 | 19 | #include "mount-util.h" |
049af8ad | 20 | #include "mountpoint-util.h" |
07630cea LP |
21 | #include "path-util.h" |
22 | #include "rm-rf.h" | |
d054f0a4 | 23 | #include "stdio-util.h" |
07630cea | 24 | #include "string-util.h" |
e5b42203 | 25 | #include "strv.h" |
c6878637 | 26 | #include "switch-root.h" |
ee104e11 | 27 | #include "user-util.h" |
41669317 | 28 | |
e5b42203 | 29 | int switch_root(const char *new_root, |
f2c1d491 | 30 | const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */ |
2932161b | 31 | SwitchRootFlags flags) { |
41669317 | 32 | |
c2d62118 LP |
33 | /* Stuff mounted below /run/ we don't save on soft reboot, as it might have lost its relevance, i.e. |
34 | * credentials, removable media and such, we rather want that the new boot mounts this fresh. But on | |
35 | * the switch from initrd we do use MS_REC, as it is expected that mounts set up in /run/ are | |
36 | * maintained. */ | |
37 | static const struct { | |
7c764d45 | 38 | const char *path; |
c2d62118 LP |
39 | unsigned long mount_flags; /* Flags to apply if SWITCH_ROOT_RECURSIVE_RUN is unset */ |
40 | unsigned long mount_flags_recursive_run; /* Flags to apply if SWITCH_ROOT_RECURSIVE_RUN is set (0 if shall be skipped) */ | |
7c764d45 | 41 | } transfer_table[] = { |
c2d62118 LP |
42 | { "/dev", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Recursive, because we want to save the original /dev/shm/ + /dev/pts/ and similar */ |
43 | { "/sys", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Similar, we want to retain various API VFS, or the cgroupv1 /sys/fs/cgroup/ tree */ | |
44 | { "/proc", MS_BIND|MS_REC, MS_BIND|MS_REC }, /* Similar */ | |
45 | { "/run", MS_BIND, MS_BIND|MS_REC }, /* Recursive except on soft reboot, see above */ | |
46 | { SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND, 0 /* skip! */ }, /* Credentials passed into the system should survive */ | |
47 | { ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND, 0 /* skip! */ }, /* Similar */ | |
48 | { "/run/host", MS_BIND|MS_REC, 0 /* skip! */ }, /* Host supplied hierarchy should also survive */ | |
7c764d45 LP |
49 | }; |
50 | ||
f2c1d491 | 51 | _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF; |
e5b42203 | 52 | _cleanup_free_ char *resolved_old_root_after = NULL; |
f2c1d491 | 53 | int r, istmp; |
e5b42203 LP |
54 | |
55 | assert(new_root); | |
41669317 | 56 | |
e5b42203 | 57 | /* Check if we shall remove the contents of the old root */ |
f2c1d491 | 58 | old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC); |
a940f507 ZJS |
59 | if (old_root_fd < 0) |
60 | return log_error_errno(errno, "Failed to open root directory: %m"); | |
f2c1d491 | 61 | |
5268188d LP |
62 | new_root_fd = open(new_root, O_DIRECTORY|O_CLOEXEC); |
63 | if (new_root_fd < 0) | |
64 | return log_error_errno(errno, "Failed to open target directory '%s': %m", new_root); | |
65 | ||
563e6846 | 66 | r = inode_same_at(old_root_fd, "", new_root_fd, "", AT_EMPTY_PATH); |
5268188d LP |
67 | if (r < 0) |
68 | return log_error_errno(r, "Failed to determine if old and new root directory are the same: %m"); | |
69 | if (r > 0) { | |
70 | log_debug("Skipping switch root, as old and new root directory are the same."); | |
71 | return 0; | |
72 | } | |
73 | ||
f717d7a4 LP |
74 | /* Make the new root directory a mount point if it isn't */ |
75 | r = fd_make_mount_point(new_root_fd); | |
76 | if (r < 0) | |
77 | return log_error_errno(r, "Failed to make new root directory a mount point: %m"); | |
21596626 YW |
78 | if (r > 0) { |
79 | int fd; | |
80 | ||
81 | /* When the path was not a mount point, then we need to reopen the path, otherwise, it still | |
82 | * points to the underlying directory. */ | |
83 | ||
84 | fd = open(new_root, O_DIRECTORY|O_CLOEXEC); | |
85 | if (fd < 0) | |
86 | return log_error_errno(errno, "Failed to reopen target directory '%s': %m", new_root); | |
87 | ||
88 | close_and_replace(new_root_fd, fd); | |
89 | } | |
f717d7a4 | 90 | |
2932161b LP |
91 | if (FLAGS_SET(flags, SWITCH_ROOT_DESTROY_OLD_ROOT)) { |
92 | istmp = fd_is_temporary_fs(old_root_fd); | |
93 | if (istmp < 0) | |
94 | return log_error_errno(istmp, "Failed to stat root directory: %m"); | |
95 | if (istmp > 0) | |
96 | log_debug("Root directory is on tmpfs, will do cleanup later."); | |
97 | } else | |
98 | istmp = -1; /* don't know */ | |
f2c1d491 | 99 | |
f2c1d491 LP |
100 | if (old_root_after) { |
101 | /* Determine where we shall place the old root after the transition */ | |
102 | r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL); | |
103 | if (r < 0) | |
104 | return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); | |
105 | if (r == 0) /* Doesn't exist yet. Let's create it */ | |
106 | (void) mkdir_p_label(resolved_old_root_after, 0755); | |
107 | } | |
108 | ||
5221b55b LP |
109 | /* We are about to unmount various file systems with MNT_DETACH (either explicitly via umount() or |
110 | * indirectly via pivot_root()), and thus do not synchronously wait for them to be fully sync'ed — | |
111 | * all while making them invisible/inaccessible in the file system tree for later code. That makes | |
112 | * sync'ing them then difficult. Let's hence issue a manual sync() here, so that we at least can | |
113 | * guarantee all file systems are an a good state before entering this state. */ | |
1795252c LP |
114 | if (!FLAGS_SET(flags, SWITCH_ROOT_DONT_SYNC)) |
115 | sync(); | |
5221b55b | 116 | |
f2c1d491 LP |
117 | /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted |
118 | * MS_SHARED. Hence remount them MS_PRIVATE here as a work-around. | |
f47fc355 LP |
119 | * |
120 | * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ | |
121 | if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) | |
e5b42203 LP |
122 | return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m"); |
123 | ||
e5b42203 LP |
124 | /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants |
125 | * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error | |
126 | * and switch_root() nevertheless. */ | |
8aefedce | 127 | (void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID); |
971ff8c7 | 128 | |
7c764d45 LP |
129 | FOREACH_ARRAY(transfer, transfer_table, ELEMENTSOF(transfer_table)) { |
130 | _cleanup_free_ char *chased = NULL; | |
c2d62118 | 131 | unsigned long mount_flags; |
7c764d45 | 132 | |
c2d62118 LP |
133 | mount_flags = FLAGS_SET(flags, SWITCH_ROOT_RECURSIVE_RUN) ? transfer->mount_flags_recursive_run : transfer->mount_flags; |
134 | if (mount_flags == 0) /* skip if zero */ | |
b12d41a8 LB |
135 | continue; |
136 | ||
7c764d45 LP |
137 | if (access(transfer->path, F_OK) < 0) { |
138 | log_debug_errno(errno, "Path '%s' to move to target root directory, not found, ignoring: %m", transfer->path); | |
139 | continue; | |
140 | } | |
141 | ||
142 | r = chase(transfer->path, new_root, CHASE_PREFIX_ROOT, &chased, NULL); | |
143 | if (r < 0) | |
144 | return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, transfer->path); | |
145 | ||
146 | /* Let's see if it is a mount point already. */ | |
147 | r = path_is_mount_point(chased, NULL, 0); | |
148 | if (r < 0) | |
149 | return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased); | |
150 | if (r > 0) /* If it is already mounted, then do nothing */ | |
151 | continue; | |
152 | ||
c2d62118 | 153 | r = mount_nofollow_verbose(LOG_ERR, transfer->path, chased, NULL, mount_flags, NULL); |
7c764d45 LP |
154 | if (r < 0) |
155 | return r; | |
156 | } | |
157 | ||
f2c1d491 | 158 | if (fchdir(new_root_fd) < 0) |
4a62c710 | 159 | return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); |
41669317 | 160 | |
e5b42203 LP |
161 | /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /), |
162 | * that's not possible however, and hence we simply overmount root */ | |
f2c1d491 LP |
163 | if (resolved_old_root_after) |
164 | r = RET_NERRNO(pivot_root(".", resolved_old_root_after)); | |
165 | else { | |
166 | r = RET_NERRNO(pivot_root(".", ".")); | |
167 | if (r >= 0) { | |
168 | /* Now unmount the upper of the two stacked file systems */ | |
169 | if (umount2(".", MNT_DETACH) < 0) | |
170 | return log_error_errno(errno, "Failed to unmount the old root: %m"); | |
e5b42203 | 171 | } |
f2c1d491 LP |
172 | } |
173 | if (r < 0) { | |
174 | log_debug_errno(r, "Pivoting root file system failed, moving mounts instead: %m"); | |
891a4918 | 175 | |
95648f9e LP |
176 | if (resolved_old_root_after) { |
177 | r = mount_nofollow_verbose(LOG_ERR, "/", resolved_old_root_after, NULL, MS_BIND|MS_REC, NULL); | |
178 | if (r < 0) | |
179 | return r; | |
180 | } | |
181 | ||
268d1244 LP |
182 | /* If we have to use MS_MOVE let's first try to get rid of *all* mounts we can, with the |
183 | * exception of the path we want to switch to, plus everything leading to it and within | |
184 | * it. This is necessary because unlike pivot_root() just moving the mount to the root via | |
185 | * MS_MOVE won't magically unmount anything below it. Once the chroot() succeeds the mounts | |
186 | * below would still be around but invisible to us, because not accessible via | |
187 | * /proc/self/mountinfo. Hence, let's clean everything up first, as long as we still can. */ | |
6b219b74 YW |
188 | if (!FLAGS_SET(flags, SWITCH_ROOT_SKIP_RECURSIVE_UMOUNT)) |
189 | (void) umount_recursive_full(NULL, MNT_DETACH, STRV_MAKE(new_root)); | |
268d1244 | 190 | |
f2c1d491 LP |
191 | if (mount(".", "/", NULL, MS_MOVE, NULL) < 0) |
192 | return log_error_errno(errno, "Failed to move %s to /: %m", new_root); | |
41669317 | 193 | |
f2c1d491 LP |
194 | if (chroot(".") < 0) |
195 | return log_error_errno(errno, "Failed to change root: %m"); | |
41669317 | 196 | |
f2c1d491 LP |
197 | if (chdir(".") < 0) |
198 | return log_error_errno(errno, "Failed to change directory: %m"); | |
199 | } | |
ac0930c8 | 200 | |
2932161b | 201 | if (istmp > 0) { |
41669317 LP |
202 | struct stat rb; |
203 | ||
204 | if (fstat(old_root_fd, &rb) < 0) | |
a940f507 | 205 | return log_error_errno(errno, "Failed to stat old root directory: %m"); |
f2c1d491 | 206 | |
84b4c785 LP |
207 | /* Note: the below won't operate on non-memory file systems (i.e. only on tmpfs, ramfs), and |
208 | * it will stop at mount boundaries */ | |
a940f507 | 209 | (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */ |
41669317 LP |
210 | } |
211 | ||
03e334a1 | 212 | return 0; |
41669317 | 213 | } |