]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
Merge branch 'master' of ssh://git.freedesktop.org/git/systemd/systemd into work
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45
46 #ifndef TTY_GID
47 #define TTY_GID 5
48 #endif
49
50 typedef enum MountMode {
51 MNT_NONE = 0,
52 MNT_FATAL = 1 << 0,
53 MNT_IN_CONTAINER = 1 << 1,
54 } MountMode;
55
56 typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
62 bool (*condition_fn)(void);
63 MountMode mode;
64 } MountPoint;
65
66 /* The first three entries we might need before SELinux is up. The
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
69 #define N_EARLY_MOUNT 4
70
71 static const MountPoint mount_table[] = {
72 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
77 NULL, MNT_FATAL|MNT_IN_CONTAINER },
78 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_NONE },
80 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
81 is_efi_boot, MNT_NONE },
82 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
83 NULL, MNT_FATAL|MNT_IN_CONTAINER },
84 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
85 NULL, MNT_IN_CONTAINER },
86 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 NULL, MNT_FATAL|MNT_IN_CONTAINER },
88 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
89 NULL, MNT_IN_CONTAINER },
90 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
91 NULL, MNT_IN_CONTAINER },
92 };
93
94 /* These are API file systems that might be mounted by other software,
95 * we just list them here so that we know that we should ignore them */
96
97 static const char ignore_paths[] =
98 /* SELinux file systems */
99 "/sys/fs/selinux\0"
100 "/selinux\0"
101 /* Legacy cgroup mount points */
102 "/dev/cgroup\0"
103 "/cgroup\0"
104 /* Legacy kernel file system */
105 "/proc/bus/usb\0"
106 /* Container bind mounts */
107 "/proc/sys\0"
108 "/dev/console\0"
109 "/proc/kmsg\0";
110
111 bool mount_point_is_api(const char *path) {
112 unsigned i;
113
114 /* Checks if this mount point is considered "API", and hence
115 * should be ignored */
116
117 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
118 if (path_equal(path, mount_table[i].where))
119 return true;
120
121 return path_startswith(path, "/sys/fs/cgroup/");
122 }
123
124 bool mount_point_ignore(const char *path) {
125 const char *i;
126
127 NULSTR_FOREACH(i, ignore_paths)
128 if (path_equal(path, i))
129 return true;
130
131 return false;
132 }
133
134 static int mount_one(const MountPoint *p, bool relabel) {
135 int r;
136
137 assert(p);
138
139 if (p->condition_fn && !p->condition_fn())
140 return 0;
141
142 /* Relabel first, just in case */
143 if (relabel)
144 label_fix(p->where, true, true);
145
146 r = path_is_mount_point(p->where, true);
147 if (r < 0)
148 return r;
149
150 if (r > 0)
151 return 0;
152
153 /* Skip securityfs in a container */
154 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
155 return 0;
156
157 /* The access mode here doesn't really matter too much, since
158 * the mounted file system will take precedence anyway. */
159 mkdir_p_label(p->where, 0755);
160
161 log_debug("Mounting %s to %s of type %s with options %s.",
162 p->what,
163 p->where,
164 p->type,
165 strna(p->options));
166
167 if (mount(p->what,
168 p->where,
169 p->type,
170 p->flags,
171 p->options) < 0) {
172 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
173 return (p->mode & MNT_FATAL) ? -errno : 0;
174 }
175
176 /* Relabel again, since we now mounted something fresh here */
177 if (relabel)
178 label_fix(p->where, false, false);
179
180 return 1;
181 }
182
183 int mount_setup_early(void) {
184 unsigned i;
185 int r = 0;
186
187 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
188
189 /* Do a minimal mount of /proc and friends to enable the most
190 * basic stuff, such as SELinux */
191 for (i = 0; i < N_EARLY_MOUNT; i ++) {
192 int j;
193
194 j = mount_one(mount_table + i, false);
195 if (r == 0)
196 r = j;
197 }
198
199 return r;
200 }
201
202 int mount_cgroup_controllers(char ***join_controllers) {
203 int r;
204 FILE *f;
205 char buf[LINE_MAX];
206 Set *controllers;
207
208 /* Mount all available cgroup controllers that are built into the kernel. */
209
210 f = fopen("/proc/cgroups", "re");
211 if (!f) {
212 log_error("Failed to enumerate cgroup controllers: %m");
213 return 0;
214 }
215
216 controllers = set_new(string_hash_func, string_compare_func);
217 if (!controllers) {
218 r = log_oom();
219 goto finish;
220 }
221
222 /* Ignore the header line */
223 (void) fgets(buf, sizeof(buf), f);
224
225 for (;;) {
226 char *controller;
227 int enabled = 0;
228
229 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
230
231 if (feof(f))
232 break;
233
234 log_error("Failed to parse /proc/cgroups.");
235 r = -EIO;
236 goto finish;
237 }
238
239 if (!enabled) {
240 free(controller);
241 continue;
242 }
243
244 r = set_put(controllers, controller);
245 if (r < 0) {
246 log_error("Failed to add controller to set.");
247 free(controller);
248 goto finish;
249 }
250 }
251
252 for (;;) {
253 MountPoint p;
254 char *controller, *where, *options;
255 char ***k = NULL;
256
257 controller = set_steal_first(controllers);
258 if (!controller)
259 break;
260
261 if (join_controllers)
262 for (k = join_controllers; *k; k++)
263 if (strv_find(*k, controller))
264 break;
265
266 if (k && *k) {
267 char **i, **j;
268
269 for (i = *k, j = *k; *i; i++) {
270
271 if (!streq(*i, controller)) {
272 char *t;
273
274 t = set_remove(controllers, *i);
275 if (!t) {
276 free(*i);
277 continue;
278 }
279 free(t);
280 }
281
282 *(j++) = *i;
283 }
284
285 *j = NULL;
286
287 options = strv_join(*k, ",");
288 if (!options) {
289 free(controller);
290 r = log_oom();
291 goto finish;
292 }
293
294 } else {
295 options = controller;
296 controller = NULL;
297 }
298
299 where = strappend("/sys/fs/cgroup/", options);
300 if (!where) {
301 free(options);
302 r = log_oom();
303 goto finish;
304 }
305
306 zero(p);
307 p.what = "cgroup";
308 p.where = where;
309 p.type = "cgroup";
310 p.options = options;
311 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
312
313 r = mount_one(&p, true);
314 free(controller);
315 free(where);
316
317 if (r < 0) {
318 free(options);
319 goto finish;
320 }
321
322 if (r > 0 && k && *k) {
323 char **i;
324
325 for (i = *k; *i; i++) {
326 char *t;
327
328 t = strappend("/sys/fs/cgroup/", *i);
329 if (!t) {
330 r = log_oom();
331 free(options);
332 goto finish;
333 }
334
335 r = symlink(options, t);
336 free(t);
337
338 if (r < 0 && errno != EEXIST) {
339 log_error("Failed to create symlink: %m");
340 r = -errno;
341 free(options);
342 goto finish;
343 }
344 }
345 }
346
347 free(options);
348 }
349
350 r = 0;
351
352 finish:
353 set_free_free(controllers);
354
355 fclose(f);
356
357 return r;
358 }
359
360 static int nftw_cb(
361 const char *fpath,
362 const struct stat *sb,
363 int tflag,
364 struct FTW *ftwbuf) {
365
366 /* No need to label /dev twice in a row... */
367 if (_unlikely_(ftwbuf->level == 0))
368 return FTW_CONTINUE;
369
370 label_fix(fpath, false, false);
371
372 /* /run/initramfs is static data and big, no need to
373 * dynamically relabel its contents at boot... */
374 if (_unlikely_(ftwbuf->level == 1 &&
375 tflag == FTW_D &&
376 streq(fpath, "/run/initramfs")))
377 return FTW_SKIP_SUBTREE;
378
379 return FTW_CONTINUE;
380 };
381
382 int mount_setup(bool loaded_policy) {
383
384 static const char relabel[] =
385 "/run/initramfs/root-fsck\0"
386 "/run/initramfs/shutdown\0";
387
388 int r;
389 unsigned i;
390 const char *j;
391
392 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
393 r = mount_one(mount_table + i, true);
394
395 if (r < 0)
396 return r;
397 }
398
399 /* Nodes in devtmpfs and /run need to be manually updated for
400 * the appropriate labels, after mounting. The other virtual
401 * API file systems like /sys and /proc do not need that, they
402 * use the same label for all their files. */
403 if (loaded_policy) {
404 usec_t before_relabel, after_relabel;
405 char timespan[FORMAT_TIMESPAN_MAX];
406
407 before_relabel = now(CLOCK_MONOTONIC);
408
409 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
410 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
411
412 /* Explicitly relabel these */
413 NULSTR_FOREACH(j, relabel)
414 label_fix(j, true, false);
415
416 after_relabel = now(CLOCK_MONOTONIC);
417
418 log_info("Relabelled /dev and /run in %s.",
419 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
420 }
421
422 /* Create a few default symlinks, which are normally created
423 * by udevd, but some scripts might need them before we start
424 * udevd. */
425 dev_setup(NULL);
426
427 /* Mark the root directory as shared in regards to mount
428 * propagation. The kernel defaults to "private", but we think
429 * it makes more sense to have a default of "shared" so that
430 * nspawn and the container tools work out of the box. If
431 * specific setups need other settings they can reset the
432 * propagation mode to private if needed. */
433 if (detect_container(NULL) <= 0)
434 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
435 log_warning("Failed to set up the root directory for shared mount propagation: %m");
436
437 /* Create a few directories we always want around */
438 mkdir_label("/run/systemd", 0755);
439 mkdir_label("/run/systemd/system", 0755);
440
441 return 0;
442 }