]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
build-sys: make EFI support build-time optional
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45
46 #ifndef TTY_GID
47 #define TTY_GID 5
48 #endif
49
50 typedef enum MountMode {
51 MNT_NONE = 0,
52 MNT_FATAL = 1 << 0,
53 MNT_IN_CONTAINER = 1 << 1,
54 } MountMode;
55
56 typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
62 bool (*condition_fn)(void);
63 MountMode mode;
64 } MountPoint;
65
66 /* The first three entries we might need before SELinux is up. The
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
69 #define N_EARLY_MOUNT 4
70
71 static const MountPoint mount_table[] = {
72 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
77 NULL, MNT_FATAL|MNT_IN_CONTAINER },
78 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_NONE },
80 #ifdef ENABLE_EFI
81 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 is_efi_boot, MNT_NONE },
83 #endif
84 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
85 NULL, MNT_FATAL|MNT_IN_CONTAINER },
86 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
87 NULL, MNT_IN_CONTAINER },
88 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
89 NULL, MNT_FATAL|MNT_IN_CONTAINER },
90 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
91 NULL, MNT_IN_CONTAINER },
92 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
93 NULL, MNT_IN_CONTAINER },
94 };
95
96 /* These are API file systems that might be mounted by other software,
97 * we just list them here so that we know that we should ignore them */
98
99 static const char ignore_paths[] =
100 /* SELinux file systems */
101 "/sys/fs/selinux\0"
102 "/selinux\0"
103 /* Legacy cgroup mount points */
104 "/dev/cgroup\0"
105 "/cgroup\0"
106 /* Legacy kernel file system */
107 "/proc/bus/usb\0"
108 /* Container bind mounts */
109 "/proc/sys\0"
110 "/dev/console\0"
111 "/proc/kmsg\0";
112
113 bool mount_point_is_api(const char *path) {
114 unsigned i;
115
116 /* Checks if this mount point is considered "API", and hence
117 * should be ignored */
118
119 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
120 if (path_equal(path, mount_table[i].where))
121 return true;
122
123 return path_startswith(path, "/sys/fs/cgroup/");
124 }
125
126 bool mount_point_ignore(const char *path) {
127 const char *i;
128
129 NULSTR_FOREACH(i, ignore_paths)
130 if (path_equal(path, i))
131 return true;
132
133 return false;
134 }
135
136 static int mount_one(const MountPoint *p, bool relabel) {
137 int r;
138
139 assert(p);
140
141 if (p->condition_fn && !p->condition_fn())
142 return 0;
143
144 /* Relabel first, just in case */
145 if (relabel)
146 label_fix(p->where, true, true);
147
148 r = path_is_mount_point(p->where, true);
149 if (r < 0)
150 return r;
151
152 if (r > 0)
153 return 0;
154
155 /* Skip securityfs in a container */
156 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
157 return 0;
158
159 /* The access mode here doesn't really matter too much, since
160 * the mounted file system will take precedence anyway. */
161 mkdir_p_label(p->where, 0755);
162
163 log_debug("Mounting %s to %s of type %s with options %s.",
164 p->what,
165 p->where,
166 p->type,
167 strna(p->options));
168
169 if (mount(p->what,
170 p->where,
171 p->type,
172 p->flags,
173 p->options) < 0) {
174 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
175 return (p->mode & MNT_FATAL) ? -errno : 0;
176 }
177
178 /* Relabel again, since we now mounted something fresh here */
179 if (relabel)
180 label_fix(p->where, false, false);
181
182 return 1;
183 }
184
185 int mount_setup_early(void) {
186 unsigned i;
187 int r = 0;
188
189 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
190
191 /* Do a minimal mount of /proc and friends to enable the most
192 * basic stuff, such as SELinux */
193 for (i = 0; i < N_EARLY_MOUNT; i ++) {
194 int j;
195
196 j = mount_one(mount_table + i, false);
197 if (r == 0)
198 r = j;
199 }
200
201 return r;
202 }
203
204 int mount_cgroup_controllers(char ***join_controllers) {
205 int r;
206 FILE *f;
207 char buf[LINE_MAX];
208 Set *controllers;
209
210 /* Mount all available cgroup controllers that are built into the kernel. */
211
212 f = fopen("/proc/cgroups", "re");
213 if (!f) {
214 log_error("Failed to enumerate cgroup controllers: %m");
215 return 0;
216 }
217
218 controllers = set_new(string_hash_func, string_compare_func);
219 if (!controllers) {
220 r = log_oom();
221 goto finish;
222 }
223
224 /* Ignore the header line */
225 (void) fgets(buf, sizeof(buf), f);
226
227 for (;;) {
228 char *controller;
229 int enabled = 0;
230
231 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
232
233 if (feof(f))
234 break;
235
236 log_error("Failed to parse /proc/cgroups.");
237 r = -EIO;
238 goto finish;
239 }
240
241 if (!enabled) {
242 free(controller);
243 continue;
244 }
245
246 r = set_put(controllers, controller);
247 if (r < 0) {
248 log_error("Failed to add controller to set.");
249 free(controller);
250 goto finish;
251 }
252 }
253
254 for (;;) {
255 MountPoint p;
256 char *controller, *where, *options;
257 char ***k = NULL;
258
259 controller = set_steal_first(controllers);
260 if (!controller)
261 break;
262
263 if (join_controllers)
264 for (k = join_controllers; *k; k++)
265 if (strv_find(*k, controller))
266 break;
267
268 if (k && *k) {
269 char **i, **j;
270
271 for (i = *k, j = *k; *i; i++) {
272
273 if (!streq(*i, controller)) {
274 char *t;
275
276 t = set_remove(controllers, *i);
277 if (!t) {
278 free(*i);
279 continue;
280 }
281 free(t);
282 }
283
284 *(j++) = *i;
285 }
286
287 *j = NULL;
288
289 options = strv_join(*k, ",");
290 if (!options) {
291 free(controller);
292 r = log_oom();
293 goto finish;
294 }
295
296 } else {
297 options = controller;
298 controller = NULL;
299 }
300
301 where = strappend("/sys/fs/cgroup/", options);
302 if (!where) {
303 free(options);
304 r = log_oom();
305 goto finish;
306 }
307
308 zero(p);
309 p.what = "cgroup";
310 p.where = where;
311 p.type = "cgroup";
312 p.options = options;
313 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
314
315 r = mount_one(&p, true);
316 free(controller);
317 free(where);
318
319 if (r < 0) {
320 free(options);
321 goto finish;
322 }
323
324 if (r > 0 && k && *k) {
325 char **i;
326
327 for (i = *k; *i; i++) {
328 char *t;
329
330 t = strappend("/sys/fs/cgroup/", *i);
331 if (!t) {
332 r = log_oom();
333 free(options);
334 goto finish;
335 }
336
337 r = symlink(options, t);
338 free(t);
339
340 if (r < 0 && errno != EEXIST) {
341 log_error("Failed to create symlink: %m");
342 r = -errno;
343 free(options);
344 goto finish;
345 }
346 }
347 }
348
349 free(options);
350 }
351
352 r = 0;
353
354 finish:
355 set_free_free(controllers);
356
357 fclose(f);
358
359 return r;
360 }
361
362 static int nftw_cb(
363 const char *fpath,
364 const struct stat *sb,
365 int tflag,
366 struct FTW *ftwbuf) {
367
368 /* No need to label /dev twice in a row... */
369 if (_unlikely_(ftwbuf->level == 0))
370 return FTW_CONTINUE;
371
372 label_fix(fpath, false, false);
373
374 /* /run/initramfs is static data and big, no need to
375 * dynamically relabel its contents at boot... */
376 if (_unlikely_(ftwbuf->level == 1 &&
377 tflag == FTW_D &&
378 streq(fpath, "/run/initramfs")))
379 return FTW_SKIP_SUBTREE;
380
381 return FTW_CONTINUE;
382 };
383
384 int mount_setup(bool loaded_policy) {
385
386 static const char relabel[] =
387 "/run/initramfs/root-fsck\0"
388 "/run/initramfs/shutdown\0";
389
390 int r;
391 unsigned i;
392 const char *j;
393
394 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
395 r = mount_one(mount_table + i, true);
396
397 if (r < 0)
398 return r;
399 }
400
401 /* Nodes in devtmpfs and /run need to be manually updated for
402 * the appropriate labels, after mounting. The other virtual
403 * API file systems like /sys and /proc do not need that, they
404 * use the same label for all their files. */
405 if (loaded_policy) {
406 usec_t before_relabel, after_relabel;
407 char timespan[FORMAT_TIMESPAN_MAX];
408
409 before_relabel = now(CLOCK_MONOTONIC);
410
411 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
412 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
413
414 /* Explicitly relabel these */
415 NULSTR_FOREACH(j, relabel)
416 label_fix(j, true, false);
417
418 after_relabel = now(CLOCK_MONOTONIC);
419
420 log_info("Relabelled /dev and /run in %s.",
421 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
422 }
423
424 /* Create a few default symlinks, which are normally created
425 * by udevd, but some scripts might need them before we start
426 * udevd. */
427 dev_setup(NULL);
428
429 /* Mark the root directory as shared in regards to mount
430 * propagation. The kernel defaults to "private", but we think
431 * it makes more sense to have a default of "shared" so that
432 * nspawn and the container tools work out of the box. If
433 * specific setups need other settings they can reset the
434 * propagation mode to private if needed. */
435 if (detect_container(NULL) <= 0)
436 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
437 log_warning("Failed to set up the root directory for shared mount propagation: %m");
438
439 /* Create a few directories we always want around */
440 mkdir_label("/run/systemd", 0755);
441 mkdir_label("/run/systemd/system", 0755);
442
443 return 0;
444 }