]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
core: fix missing bus-util.h include
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <ftw.h>
27
28 #include "mount-setup.h"
29 #include "dev-setup.h"
30 #include "bus-util.h"
31 #include "log.h"
32 #include "macro.h"
33 #include "util.h"
34 #include "label.h"
35 #include "set.h"
36 #include "strv.h"
37 #include "mkdir.h"
38 #include "path-util.h"
39 #include "missing.h"
40 #include "virt.h"
41 #include "efivars.h"
42 #include "smack-util.h"
43 #include "cgroup-util.h"
44
45 typedef enum MountMode {
46 MNT_NONE = 0,
47 MNT_FATAL = 1 << 0,
48 MNT_IN_CONTAINER = 1 << 1,
49 } MountMode;
50
51 typedef struct MountPoint {
52 const char *what;
53 const char *where;
54 const char *type;
55 const char *options;
56 unsigned long flags;
57 bool (*condition_fn)(void);
58 MountMode mode;
59 } MountPoint;
60
61 /* The first three entries we might need before SELinux is up. The
62 * fourth (securityfs) is needed by IMA to load a custom policy. The
63 * other ones we can delay until SELinux and IMA are loaded. When
64 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
65 #ifdef HAVE_SMACK
66 #define N_EARLY_MOUNT 5
67 #else
68 #define N_EARLY_MOUNT 4
69 #endif
70
71 static const MountPoint mount_table[] = {
72 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
77 NULL, MNT_FATAL|MNT_IN_CONTAINER },
78 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_NONE },
80 #ifdef HAVE_SMACK
81 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 mac_smack_use, MNT_FATAL },
83 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
84 mac_smack_use, MNT_FATAL },
85 #endif
86 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 NULL, MNT_FATAL|MNT_IN_CONTAINER },
88 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
89 NULL, MNT_IN_CONTAINER },
90 #ifdef HAVE_SMACK
91 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
92 mac_smack_use, MNT_FATAL },
93 #endif
94 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
95 NULL, MNT_FATAL|MNT_IN_CONTAINER },
96 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
97 NULL, MNT_FATAL|MNT_IN_CONTAINER },
98 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
99 NULL, MNT_IN_CONTAINER },
100 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
101 NULL, MNT_FATAL|MNT_IN_CONTAINER },
102 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
103 NULL, MNT_NONE },
104 #ifdef ENABLE_EFI
105 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 is_efi_boot, MNT_NONE },
107 #endif
108 { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_kdbus_wanted, MNT_IN_CONTAINER },
110 };
111
112 /* These are API file systems that might be mounted by other software,
113 * we just list them here so that we know that we should ignore them */
114
115 static const char ignore_paths[] =
116 /* SELinux file systems */
117 "/sys/fs/selinux\0"
118 /* Container bind mounts */
119 "/proc/sys\0"
120 "/dev/console\0"
121 "/proc/kmsg\0";
122
123 bool mount_point_is_api(const char *path) {
124 unsigned i;
125
126 /* Checks if this mount point is considered "API", and hence
127 * should be ignored */
128
129 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
130 if (path_equal(path, mount_table[i].where))
131 return true;
132
133 return path_startswith(path, "/sys/fs/cgroup/");
134 }
135
136 bool mount_point_ignore(const char *path) {
137 const char *i;
138
139 NULSTR_FOREACH(i, ignore_paths)
140 if (path_equal(path, i))
141 return true;
142
143 return false;
144 }
145
146 static int mount_one(const MountPoint *p, bool relabel) {
147 int r;
148
149 assert(p);
150
151 if (p->condition_fn && !p->condition_fn())
152 return 0;
153
154 /* Relabel first, just in case */
155 if (relabel)
156 label_fix(p->where, true, true);
157
158 r = path_is_mount_point(p->where, AT_SYMLINK_FOLLOW);
159 if (r < 0 && r != -ENOENT)
160 return r;
161 if (r > 0)
162 return 0;
163
164 /* Skip securityfs in a container */
165 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
166 return 0;
167
168 /* The access mode here doesn't really matter too much, since
169 * the mounted file system will take precedence anyway. */
170 if (relabel)
171 mkdir_p_label(p->where, 0755);
172 else
173 mkdir_p(p->where, 0755);
174
175 log_debug("Mounting %s to %s of type %s with options %s.",
176 p->what,
177 p->where,
178 p->type,
179 strna(p->options));
180
181 if (mount(p->what,
182 p->where,
183 p->type,
184 p->flags,
185 p->options) < 0) {
186 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
187 return (p->mode & MNT_FATAL) ? -errno : 0;
188 }
189
190 /* Relabel again, since we now mounted something fresh here */
191 if (relabel)
192 label_fix(p->where, false, false);
193
194 return 1;
195 }
196
197 int mount_setup_early(void) {
198 unsigned i;
199 int r = 0;
200
201 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
202
203 /* Do a minimal mount of /proc and friends to enable the most
204 * basic stuff, such as SELinux */
205 for (i = 0; i < N_EARLY_MOUNT; i ++) {
206 int j;
207
208 j = mount_one(mount_table + i, false);
209 if (r == 0)
210 r = j;
211 }
212
213 return r;
214 }
215
216 int mount_cgroup_controllers(char ***join_controllers) {
217 _cleanup_set_free_free_ Set *controllers = NULL;
218 int r;
219
220 /* Mount all available cgroup controllers that are built into the kernel. */
221
222 controllers = set_new(&string_hash_ops);
223 if (!controllers)
224 return log_oom();
225
226 r = cg_kernel_controllers(controllers);
227 if (r < 0)
228 return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
229
230 for (;;) {
231 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
232 MountPoint p = {
233 .what = "cgroup",
234 .type = "cgroup",
235 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
236 .mode = MNT_IN_CONTAINER,
237 };
238 char ***k = NULL;
239
240 controller = set_steal_first(controllers);
241 if (!controller)
242 break;
243
244 if (join_controllers)
245 for (k = join_controllers; *k; k++)
246 if (strv_find(*k, controller))
247 break;
248
249 if (k && *k) {
250 char **i, **j;
251
252 for (i = *k, j = *k; *i; i++) {
253
254 if (!streq(*i, controller)) {
255 _cleanup_free_ char *t;
256
257 t = set_remove(controllers, *i);
258 if (!t) {
259 free(*i);
260 continue;
261 }
262 }
263
264 *(j++) = *i;
265 }
266
267 *j = NULL;
268
269 options = strv_join(*k, ",");
270 if (!options)
271 return log_oom();
272 } else {
273 options = controller;
274 controller = NULL;
275 }
276
277 where = strappend("/sys/fs/cgroup/", options);
278 if (!where)
279 return log_oom();
280
281 p.where = where;
282 p.options = options;
283
284 r = mount_one(&p, true);
285 if (r < 0)
286 return r;
287
288 if (r > 0 && k && *k) {
289 char **i;
290
291 for (i = *k; *i; i++) {
292 _cleanup_free_ char *t = NULL;
293
294 t = strappend("/sys/fs/cgroup/", *i);
295 if (!t)
296 return log_oom();
297
298 r = symlink(options, t);
299 if (r < 0 && errno != EEXIST)
300 return log_error_errno(errno, "Failed to create symlink %s: %m", t);
301 }
302 }
303 }
304
305 /* Now that we mounted everything, let's make the tmpfs the
306 * cgroup file systems are mounted into read-only. */
307 (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
308
309 return 0;
310 }
311
312 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
313 static int nftw_cb(
314 const char *fpath,
315 const struct stat *sb,
316 int tflag,
317 struct FTW *ftwbuf) {
318
319 /* No need to label /dev twice in a row... */
320 if (_unlikely_(ftwbuf->level == 0))
321 return FTW_CONTINUE;
322
323 label_fix(fpath, false, false);
324
325 /* /run/initramfs is static data and big, no need to
326 * dynamically relabel its contents at boot... */
327 if (_unlikely_(ftwbuf->level == 1 &&
328 tflag == FTW_D &&
329 streq(fpath, "/run/initramfs")))
330 return FTW_SKIP_SUBTREE;
331
332 return FTW_CONTINUE;
333 };
334 #endif
335
336 int mount_setup(bool loaded_policy) {
337 unsigned i;
338 int r = 0;
339
340 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
341 int j;
342
343 j = mount_one(mount_table + i, loaded_policy);
344 if (r == 0)
345 r = j;
346 }
347
348 if (r < 0)
349 return r;
350
351 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
352 /* Nodes in devtmpfs and /run need to be manually updated for
353 * the appropriate labels, after mounting. The other virtual
354 * API file systems like /sys and /proc do not need that, they
355 * use the same label for all their files. */
356 if (loaded_policy) {
357 usec_t before_relabel, after_relabel;
358 char timespan[FORMAT_TIMESPAN_MAX];
359
360 before_relabel = now(CLOCK_MONOTONIC);
361
362 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
363 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
364
365 after_relabel = now(CLOCK_MONOTONIC);
366
367 log_info("Relabelled /dev and /run in %s.",
368 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
369 }
370 #endif
371
372 /* Create a few default symlinks, which are normally created
373 * by udevd, but some scripts might need them before we start
374 * udevd. */
375 dev_setup(NULL, UID_INVALID, GID_INVALID);
376
377 /* Mark the root directory as shared in regards to mount
378 * propagation. The kernel defaults to "private", but we think
379 * it makes more sense to have a default of "shared" so that
380 * nspawn and the container tools work out of the box. If
381 * specific setups need other settings they can reset the
382 * propagation mode to private if needed. */
383 if (detect_container(NULL) <= 0)
384 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
385 log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
386
387 /* Create a few directories we always want around, Note that
388 * sd_booted() checks for /run/systemd/system, so this mkdir
389 * really needs to stay for good, otherwise software that
390 * copied sd-daemon.c into their sources will misdetect
391 * systemd. */
392 mkdir_label("/run/systemd", 0755);
393 mkdir_label("/run/systemd/system", 0755);
394 mkdir_label("/run/systemd/inaccessible", 0000);
395
396 return 0;
397 }