]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
Merge branch 'python-systemd-reader'
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45
46 #ifndef TTY_GID
47 #define TTY_GID 5
48 #endif
49
50 typedef enum MountMode {
51 MNT_NONE = 0,
52 MNT_FATAL = 1 << 0,
53 MNT_IN_CONTAINER = 1 << 1,
54 } MountMode;
55
56 typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
62 bool (*condition_fn)(void);
63 MountMode mode;
64 } MountPoint;
65
66 /* The first three entries we might need before SELinux is up. The
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
69 #define N_EARLY_MOUNT 4
70
71 static const MountPoint mount_table[] = {
72 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
77 NULL, MNT_FATAL|MNT_IN_CONTAINER },
78 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_NONE },
80 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
81 NULL, MNT_FATAL|MNT_IN_CONTAINER },
82 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
83 NULL, MNT_IN_CONTAINER },
84 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
85 NULL, MNT_FATAL|MNT_IN_CONTAINER },
86 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
87 NULL, MNT_IN_CONTAINER },
88 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
89 NULL, MNT_IN_CONTAINER },
90 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
91 NULL, MNT_NONE },
92 #ifdef ENABLE_EFI
93 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
94 is_efi_boot, MNT_NONE },
95 #endif
96 };
97
98 /* These are API file systems that might be mounted by other software,
99 * we just list them here so that we know that we should ignore them */
100
101 static const char ignore_paths[] =
102 /* SELinux file systems */
103 "/sys/fs/selinux\0"
104 "/selinux\0"
105 /* Legacy cgroup mount points */
106 "/dev/cgroup\0"
107 "/cgroup\0"
108 /* Legacy kernel file system */
109 "/proc/bus/usb\0"
110 /* Container bind mounts */
111 "/proc/sys\0"
112 "/dev/console\0"
113 "/proc/kmsg\0";
114
115 bool mount_point_is_api(const char *path) {
116 unsigned i;
117
118 /* Checks if this mount point is considered "API", and hence
119 * should be ignored */
120
121 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
122 if (path_equal(path, mount_table[i].where))
123 return true;
124
125 return path_startswith(path, "/sys/fs/cgroup/");
126 }
127
128 bool mount_point_ignore(const char *path) {
129 const char *i;
130
131 NULSTR_FOREACH(i, ignore_paths)
132 if (path_equal(path, i))
133 return true;
134
135 return false;
136 }
137
138 static int mount_one(const MountPoint *p, bool relabel) {
139 int r;
140
141 assert(p);
142
143 if (p->condition_fn && !p->condition_fn())
144 return 0;
145
146 /* Relabel first, just in case */
147 if (relabel)
148 label_fix(p->where, true, true);
149
150 r = path_is_mount_point(p->where, true);
151 if (r < 0)
152 return r;
153
154 if (r > 0)
155 return 0;
156
157 /* Skip securityfs in a container */
158 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
159 return 0;
160
161 /* The access mode here doesn't really matter too much, since
162 * the mounted file system will take precedence anyway. */
163 mkdir_p_label(p->where, 0755);
164
165 log_debug("Mounting %s to %s of type %s with options %s.",
166 p->what,
167 p->where,
168 p->type,
169 strna(p->options));
170
171 if (mount(p->what,
172 p->where,
173 p->type,
174 p->flags,
175 p->options) < 0) {
176 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
177 return (p->mode & MNT_FATAL) ? -errno : 0;
178 }
179
180 /* Relabel again, since we now mounted something fresh here */
181 if (relabel)
182 label_fix(p->where, false, false);
183
184 return 1;
185 }
186
187 int mount_setup_early(void) {
188 unsigned i;
189 int r = 0;
190
191 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
192
193 /* Do a minimal mount of /proc and friends to enable the most
194 * basic stuff, such as SELinux */
195 for (i = 0; i < N_EARLY_MOUNT; i ++) {
196 int j;
197
198 j = mount_one(mount_table + i, false);
199 if (r == 0)
200 r = j;
201 }
202
203 return r;
204 }
205
206 int mount_cgroup_controllers(char ***join_controllers) {
207 int r;
208 FILE *f;
209 char buf[LINE_MAX];
210 Set *controllers;
211
212 /* Mount all available cgroup controllers that are built into the kernel. */
213
214 f = fopen("/proc/cgroups", "re");
215 if (!f) {
216 log_error("Failed to enumerate cgroup controllers: %m");
217 return 0;
218 }
219
220 controllers = set_new(string_hash_func, string_compare_func);
221 if (!controllers) {
222 r = log_oom();
223 goto finish;
224 }
225
226 /* Ignore the header line */
227 (void) fgets(buf, sizeof(buf), f);
228
229 for (;;) {
230 char *controller;
231 int enabled = 0;
232
233 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
234
235 if (feof(f))
236 break;
237
238 log_error("Failed to parse /proc/cgroups.");
239 r = -EIO;
240 goto finish;
241 }
242
243 if (!enabled) {
244 free(controller);
245 continue;
246 }
247
248 r = set_put(controllers, controller);
249 if (r < 0) {
250 log_error("Failed to add controller to set.");
251 free(controller);
252 goto finish;
253 }
254 }
255
256 for (;;) {
257 MountPoint p;
258 char *controller, *where, *options;
259 char ***k = NULL;
260
261 controller = set_steal_first(controllers);
262 if (!controller)
263 break;
264
265 if (join_controllers)
266 for (k = join_controllers; *k; k++)
267 if (strv_find(*k, controller))
268 break;
269
270 if (k && *k) {
271 char **i, **j;
272
273 for (i = *k, j = *k; *i; i++) {
274
275 if (!streq(*i, controller)) {
276 char *t;
277
278 t = set_remove(controllers, *i);
279 if (!t) {
280 free(*i);
281 continue;
282 }
283 free(t);
284 }
285
286 *(j++) = *i;
287 }
288
289 *j = NULL;
290
291 options = strv_join(*k, ",");
292 if (!options) {
293 free(controller);
294 r = log_oom();
295 goto finish;
296 }
297
298 } else {
299 options = controller;
300 controller = NULL;
301 }
302
303 where = strappend("/sys/fs/cgroup/", options);
304 if (!where) {
305 free(options);
306 r = log_oom();
307 goto finish;
308 }
309
310 zero(p);
311 p.what = "cgroup";
312 p.where = where;
313 p.type = "cgroup";
314 p.options = options;
315 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
316
317 r = mount_one(&p, true);
318 free(controller);
319 free(where);
320
321 if (r < 0) {
322 free(options);
323 goto finish;
324 }
325
326 if (r > 0 && k && *k) {
327 char **i;
328
329 for (i = *k; *i; i++) {
330 char *t;
331
332 t = strappend("/sys/fs/cgroup/", *i);
333 if (!t) {
334 r = log_oom();
335 free(options);
336 goto finish;
337 }
338
339 r = symlink(options, t);
340 free(t);
341
342 if (r < 0 && errno != EEXIST) {
343 log_error("Failed to create symlink: %m");
344 r = -errno;
345 free(options);
346 goto finish;
347 }
348 }
349 }
350
351 free(options);
352 }
353
354 r = 0;
355
356 finish:
357 set_free_free(controllers);
358
359 fclose(f);
360
361 return r;
362 }
363
364 static int nftw_cb(
365 const char *fpath,
366 const struct stat *sb,
367 int tflag,
368 struct FTW *ftwbuf) {
369
370 /* No need to label /dev twice in a row... */
371 if (_unlikely_(ftwbuf->level == 0))
372 return FTW_CONTINUE;
373
374 label_fix(fpath, false, false);
375
376 /* /run/initramfs is static data and big, no need to
377 * dynamically relabel its contents at boot... */
378 if (_unlikely_(ftwbuf->level == 1 &&
379 tflag == FTW_D &&
380 streq(fpath, "/run/initramfs")))
381 return FTW_SKIP_SUBTREE;
382
383 return FTW_CONTINUE;
384 };
385
386 int mount_setup(bool loaded_policy) {
387
388 static const char relabel[] =
389 "/run/initramfs/root-fsck\0"
390 "/run/initramfs/shutdown\0";
391
392 int r;
393 unsigned i;
394 const char *j;
395
396 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
397 r = mount_one(mount_table + i, true);
398
399 if (r < 0)
400 return r;
401 }
402
403 /* Nodes in devtmpfs and /run need to be manually updated for
404 * the appropriate labels, after mounting. The other virtual
405 * API file systems like /sys and /proc do not need that, they
406 * use the same label for all their files. */
407 if (loaded_policy) {
408 usec_t before_relabel, after_relabel;
409 char timespan[FORMAT_TIMESPAN_MAX];
410
411 before_relabel = now(CLOCK_MONOTONIC);
412
413 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
414 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
415
416 /* Explicitly relabel these */
417 NULSTR_FOREACH(j, relabel)
418 label_fix(j, true, false);
419
420 after_relabel = now(CLOCK_MONOTONIC);
421
422 log_info("Relabelled /dev and /run in %s.",
423 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
424 }
425
426 /* Create a few default symlinks, which are normally created
427 * by udevd, but some scripts might need them before we start
428 * udevd. */
429 dev_setup(NULL);
430
431 /* Mark the root directory as shared in regards to mount
432 * propagation. The kernel defaults to "private", but we think
433 * it makes more sense to have a default of "shared" so that
434 * nspawn and the container tools work out of the box. If
435 * specific setups need other settings they can reset the
436 * propagation mode to private if needed. */
437 if (detect_container(NULL) <= 0)
438 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
439 log_warning("Failed to set up the root directory for shared mount propagation: %m");
440
441 /* Create a few directories we always want around */
442 mkdir_label("/run/systemd", 0755);
443 mkdir_label("/run/systemd/system", 0755);
444
445 return 0;
446 }