]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/switch-root.c
Merge pull request #27621 from esposem/ukify_read
[thirdparty/systemd.git] / src / shared / switch-root.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <stdbool.h>
7 #include <sys/mount.h>
8 #include <sys/stat.h>
9 #include <unistd.h>
10
11 #include "base-filesystem.h"
12 #include "chase.h"
13 #include "creds-util.h"
14 #include "fd-util.h"
15 #include "initrd-util.h"
16 #include "log.h"
17 #include "missing_syscall.h"
18 #include "mkdir-label.h"
19 #include "mount-util.h"
20 #include "mountpoint-util.h"
21 #include "path-util.h"
22 #include "rm-rf.h"
23 #include "stdio-util.h"
24 #include "string-util.h"
25 #include "strv.h"
26 #include "switch-root.h"
27 #include "user-util.h"
28
29 int switch_root(const char *new_root,
30 const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */
31 SwitchRootFlags flags) {
32
33 /* Stuff mounted below /run we don't save on soft reboot, as it might have lost its relevance, i.e.
34 * credentials, removable media and such, we rather want that the new boot mounts this fresh.
35 * But on the switch from initrd we do use MS_REC, as it is expected that mounts set up in /run
36 * are maintained. */
37 unsigned long run_mount_flags = MS_BIND|(!FLAGS_SET(flags, SWITCH_ROOT_SKIP_RECURSIVE_RUN) ? MS_REC : 0);
38 struct {
39 const char *path;
40 unsigned long mount_flags;
41 bool skip_if_run_is_rec; /* For child mounts of /run, if it's moved recursively no need to handle */
42 } transfer_table[] = {
43 { "/dev", MS_BIND|MS_REC, false }, /* Recursive, because we want to save the original /dev/shm + /dev/pts and similar */
44 { "/sys", MS_BIND|MS_REC, false }, /* Similar, we want to retain various API VFS, or the cgroupv1 /sys/fs/cgroup/ tree */
45 { "/proc", MS_BIND|MS_REC, false }, /* Similar */
46 { "/run", run_mount_flags, false }, /* Recursive except on soft reboot, see above */
47 { SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND, true }, /* Credentials passed into the system should survive */
48 { ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND, true }, /* Similar */
49 { "/run/host", MS_BIND|MS_REC, true }, /* Host supplied hierarchy should also survive */
50 };
51
52 _cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF;
53 _cleanup_free_ char *resolved_old_root_after = NULL;
54 int r, istmp;
55
56 assert(new_root);
57
58 /* Check if we shall remove the contents of the old root */
59 old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC);
60 if (old_root_fd < 0)
61 return log_error_errno(errno, "Failed to open root directory: %m");
62
63 new_root_fd = open(new_root, O_DIRECTORY|O_CLOEXEC);
64 if (new_root_fd < 0)
65 return log_error_errno(errno, "Failed to open target directory '%s': %m", new_root);
66
67 r = inode_same_at(old_root_fd, "", new_root_fd, "", AT_EMPTY_PATH);
68 if (r < 0)
69 return log_error_errno(r, "Failed to determine if old and new root directory are the same: %m");
70 if (r > 0) {
71 log_debug("Skipping switch root, as old and new root directory are the same.");
72 return 0;
73 }
74
75 /* Make the new root directory a mount point if it isn't */
76 r = fd_make_mount_point(new_root_fd);
77 if (r < 0)
78 return log_error_errno(r, "Failed to make new root directory a mount point: %m");
79
80 if (FLAGS_SET(flags, SWITCH_ROOT_DESTROY_OLD_ROOT)) {
81 istmp = fd_is_temporary_fs(old_root_fd);
82 if (istmp < 0)
83 return log_error_errno(istmp, "Failed to stat root directory: %m");
84 if (istmp > 0)
85 log_debug("Root directory is on tmpfs, will do cleanup later.");
86 } else
87 istmp = -1; /* don't know */
88
89 if (old_root_after) {
90 /* Determine where we shall place the old root after the transition */
91 r = chase(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
92 if (r < 0)
93 return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
94 if (r == 0) /* Doesn't exist yet. Let's create it */
95 (void) mkdir_p_label(resolved_old_root_after, 0755);
96 }
97
98 /* We are about to unmount various file systems with MNT_DETACH (either explicitly via umount() or
99 * indirectly via pivot_root()), and thus do not synchronously wait for them to be fully sync'ed —
100 * all while making them invisible/inaccessible in the file system tree for later code. That makes
101 * sync'ing them then difficult. Let's hence issue a manual sync() here, so that we at least can
102 * guarantee all file systems are an a good state before entering this state. */
103 if (!FLAGS_SET(flags, SWITCH_ROOT_DONT_SYNC))
104 sync();
105
106 /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted
107 * MS_SHARED. Hence remount them MS_PRIVATE here as a work-around.
108 *
109 * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
110 if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
111 return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
112
113 /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
114 * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
115 * and switch_root() nevertheless. */
116 (void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID);
117
118 FOREACH_ARRAY(transfer, transfer_table, ELEMENTSOF(transfer_table)) {
119 _cleanup_free_ char *chased = NULL;
120
121 if (transfer->skip_if_run_is_rec && !FLAGS_SET(flags, SWITCH_ROOT_SKIP_RECURSIVE_RUN))
122 continue;
123
124 if (access(transfer->path, F_OK) < 0) {
125 log_debug_errno(errno, "Path '%s' to move to target root directory, not found, ignoring: %m", transfer->path);
126 continue;
127 }
128
129 r = chase(transfer->path, new_root, CHASE_PREFIX_ROOT, &chased, NULL);
130 if (r < 0)
131 return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, transfer->path);
132
133 /* Let's see if it is a mount point already. */
134 r = path_is_mount_point(chased, NULL, 0);
135 if (r < 0)
136 return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
137 if (r > 0) /* If it is already mounted, then do nothing */
138 continue;
139
140 r = mount_nofollow_verbose(LOG_ERR, transfer->path, chased, NULL, transfer->mount_flags, NULL);
141 if (r < 0)
142 return r;
143 }
144
145 if (fchdir(new_root_fd) < 0)
146 return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
147
148 /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
149 * that's not possible however, and hence we simply overmount root */
150 if (resolved_old_root_after)
151 r = RET_NERRNO(pivot_root(".", resolved_old_root_after));
152 else {
153 r = RET_NERRNO(pivot_root(".", "."));
154 if (r >= 0) {
155 /* Now unmount the upper of the two stacked file systems */
156 if (umount2(".", MNT_DETACH) < 0)
157 return log_error_errno(errno, "Failed to unmount the old root: %m");
158 }
159 }
160 if (r < 0) {
161 log_debug_errno(r, "Pivoting root file system failed, moving mounts instead: %m");
162
163 /* If we have to use MS_MOVE let's first try to get rid of *all* mounts we can, with the
164 * exception of the path we want to switch to, plus everything leading to it and within
165 * it. This is necessary because unlike pivot_root() just moving the mount to the root via
166 * MS_MOVE won't magically unmount anything below it. Once the chroot() succeeds the mounts
167 * below would still be around but invisible to us, because not accessible via
168 * /proc/self/mountinfo. Hence, let's clean everything up first, as long as we still can. */
169 (void) umount_recursive_full(NULL, MNT_DETACH, STRV_MAKE(new_root));
170
171 if (mount(".", "/", NULL, MS_MOVE, NULL) < 0)
172 return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
173
174 if (chroot(".") < 0)
175 return log_error_errno(errno, "Failed to change root: %m");
176
177 if (chdir(".") < 0)
178 return log_error_errno(errno, "Failed to change directory: %m");
179 }
180
181 if (istmp > 0) {
182 struct stat rb;
183
184 if (fstat(old_root_fd, &rb) < 0)
185 return log_error_errno(errno, "Failed to stat old root directory: %m");
186
187 /* Note: the below won't operate on non-memory file systems (i.e. only on tmpfs, ramfs), and
188 * it will stop at mount boundaries */
189 (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
190 }
191
192 return 0;
193 }