]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
TODO: uses for SO_REUSEPORT
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
34e5a31e 44#include "efivars.h"
8e274523 45
bef2733f
LP
46#ifndef TTY_GID
47#define TTY_GID 5
48#endif
49
6aa220e0
KS
50typedef enum MountMode {
51 MNT_NONE = 0,
52 MNT_FATAL = 1 << 0,
53 MNT_IN_CONTAINER = 1 << 1,
54} MountMode;
55
ca714c0e
LP
56typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
6aa220e0
KS
62 bool (*condition_fn)(void);
63 MountMode mode;
ca714c0e
LP
64} MountPoint;
65
4ef31082 66/* The first three entries we might need before SELinux is up. The
160481f6
RS
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
ffbd2c4d 69#define N_EARLY_MOUNT 5
4ef31082 70
ca714c0e 71static const MountPoint mount_table[] = {
6aa220e0
KS
72 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
77 NULL, MNT_FATAL|MNT_IN_CONTAINER },
78 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_NONE },
ffbd2c4d
NC
80 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
81 NULL, MNT_NONE },
6aa220e0
KS
82 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
83 NULL, MNT_FATAL|MNT_IN_CONTAINER },
84 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
85 NULL, MNT_IN_CONTAINER },
86 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 NULL, MNT_FATAL|MNT_IN_CONTAINER },
88 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
89 NULL, MNT_IN_CONTAINER },
90 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
91 NULL, MNT_IN_CONTAINER },
c06bf414
KS
92 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
93 NULL, MNT_NONE },
94#ifdef ENABLE_EFI
95 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
96 is_efi_boot, MNT_NONE },
97#endif
8e274523
LP
98};
99
949c6510 100/* These are API file systems that might be mounted by other software,
46ff0ed7 101 * we just list them here so that we know that we should ignore them */
949c6510 102
eaeb18db
LP
103static const char ignore_paths[] =
104 /* SELinux file systems */
105 "/sys/fs/selinux\0"
106 "/selinux\0"
107 /* Legacy cgroup mount points */
108 "/dev/cgroup\0"
109 "/cgroup\0"
110 /* Legacy kernel file system */
111 "/proc/bus/usb\0"
112 /* Container bind mounts */
113 "/proc/sys\0"
114 "/dev/console\0"
c481f78b 115 "/proc/kmsg\0";
949c6510 116
dad08730
LP
117bool mount_point_is_api(const char *path) {
118 unsigned i;
119
120 /* Checks if this mount point is considered "API", and hence
121 * should be ignored */
122
ca714c0e 123 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 124 if (path_equal(path, mount_table[i].where))
dad08730
LP
125 return true;
126
57f2a956
KS
127 return path_startswith(path, "/sys/fs/cgroup/");
128}
129
130bool mount_point_ignore(const char *path) {
eaeb18db 131 const char *i;
57f2a956 132
eaeb18db
LP
133 NULSTR_FOREACH(i, ignore_paths)
134 if (path_equal(path, i))
949c6510
LP
135 return true;
136
57f2a956 137 return false;
dad08730
LP
138}
139
4ef31082 140static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
141 int r;
142
ca714c0e 143 assert(p);
8e274523 144
6aa220e0
KS
145 if (p->condition_fn && !p->condition_fn())
146 return 0;
147
51b4af2c 148 /* Relabel first, just in case */
4ef31082 149 if (relabel)
c9bc0764 150 label_fix(p->where, true, true);
51b4af2c 151
c481f78b
LP
152 r = path_is_mount_point(p->where, true);
153 if (r < 0)
8e274523
LP
154 return r;
155
156 if (r > 0)
51b4af2c 157 return 0;
8e274523 158
c481f78b 159 /* Skip securityfs in a container */
6aa220e0 160 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
161 return 0;
162
a04f58d6
LP
163 /* The access mode here doesn't really matter too much, since
164 * the mounted file system will take precedence anyway. */
d2e54fae 165 mkdir_p_label(p->where, 0755);
a04f58d6 166
8e274523 167 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
168 p->what,
169 p->where,
170 p->type,
171 strna(p->options));
172
173 if (mount(p->what,
174 p->where,
175 p->type,
176 p->flags,
177 p->options) < 0) {
6aa220e0
KS
178 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
179 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
180 }
181
51b4af2c 182 /* Relabel again, since we now mounted something fresh here */
4ef31082 183 if (relabel)
c9bc0764 184 label_fix(p->where, false, false);
5275d3c1 185
0c85a4f3 186 return 1;
8e274523
LP
187}
188
4ef31082
LP
189int mount_setup_early(void) {
190 unsigned i;
191 int r = 0;
192
193 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
194
195 /* Do a minimal mount of /proc and friends to enable the most
196 * basic stuff, such as SELinux */
197 for (i = 0; i < N_EARLY_MOUNT; i ++) {
198 int j;
199
200 j = mount_one(mount_table + i, false);
201 if (r == 0)
202 r = j;
203 }
204
205 return r;
206}
207
0c85a4f3 208int mount_cgroup_controllers(char ***join_controllers) {
2076ca54
LP
209 int r;
210 FILE *f;
20c03b7b 211 char buf[LINE_MAX];
0c85a4f3 212 Set *controllers;
2076ca54 213
670802d4 214 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 215
0c85a4f3
LP
216 f = fopen("/proc/cgroups", "re");
217 if (!f) {
016e9849
LP
218 log_error("Failed to enumerate cgroup controllers: %m");
219 return 0;
220 }
2076ca54 221
0c85a4f3
LP
222 controllers = set_new(string_hash_func, string_compare_func);
223 if (!controllers) {
14212119 224 r = log_oom();
0c85a4f3
LP
225 goto finish;
226 }
227
2076ca54 228 /* Ignore the header line */
bab45044 229 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
230
231 for (;;) {
0c85a4f3
LP
232 char *controller;
233 int enabled = 0;
2076ca54 234
16f6682d 235 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
236
237 if (feof(f))
238 break;
239
240 log_error("Failed to parse /proc/cgroups.");
241 r = -EIO;
242 goto finish;
243 }
244
600a328f
LP
245 if (!enabled) {
246 free(controller);
247 continue;
248 }
249
0c85a4f3
LP
250 r = set_put(controllers, controller);
251 if (r < 0) {
252 log_error("Failed to add controller to set.");
2076ca54 253 free(controller);
0c85a4f3
LP
254 goto finish;
255 }
256 }
257
258 for (;;) {
259 MountPoint p;
260 char *controller, *where, *options;
261 char ***k = NULL;
262
263 controller = set_steal_first(controllers);
264 if (!controller)
265 break;
266
267 if (join_controllers)
268 for (k = join_controllers; *k; k++)
269 if (strv_find(*k, controller))
270 break;
271
272 if (k && *k) {
273 char **i, **j;
274
275 for (i = *k, j = *k; *i; i++) {
276
277 if (!streq(*i, controller)) {
278 char *t;
279
280 t = set_remove(controllers, *i);
281 if (!t) {
282 free(*i);
283 continue;
284 }
285 free(t);
286 }
287
288 *(j++) = *i;
289 }
290
291 *j = NULL;
292
293 options = strv_join(*k, ",");
294 if (!options) {
0c85a4f3 295 free(controller);
14212119 296 r = log_oom();
0c85a4f3
LP
297 goto finish;
298 }
299
300 } else {
301 options = controller;
302 controller = NULL;
303 }
304
305 where = strappend("/sys/fs/cgroup/", options);
306 if (!where) {
0c85a4f3 307 free(options);
14212119 308 r = log_oom();
2076ca54
LP
309 goto finish;
310 }
311
312 zero(p);
313 p.what = "cgroup";
314 p.where = where;
315 p.type = "cgroup";
0c85a4f3 316 p.options = options;
2076ca54 317 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
2076ca54 318
4ef31082 319 r = mount_one(&p, true);
2076ca54
LP
320 free(controller);
321 free(where);
322
0c85a4f3
LP
323 if (r < 0) {
324 free(options);
2076ca54 325 goto finish;
0c85a4f3
LP
326 }
327
328 if (r > 0 && k && *k) {
329 char **i;
330
331 for (i = *k; *i; i++) {
332 char *t;
333
334 t = strappend("/sys/fs/cgroup/", *i);
335 if (!t) {
14212119 336 r = log_oom();
0c85a4f3
LP
337 free(options);
338 goto finish;
339 }
340
341 r = symlink(options, t);
342 free(t);
343
344 if (r < 0 && errno != EEXIST) {
345 log_error("Failed to create symlink: %m");
346 r = -errno;
347 free(options);
348 goto finish;
349 }
350 }
351 }
352
353 free(options);
2076ca54
LP
354 }
355
356 r = 0;
357
358finish:
0c85a4f3
LP
359 set_free_free(controllers);
360
2076ca54
LP
361 fclose(f);
362
363 return r;
364}
365
1829dc9d
LP
366static int nftw_cb(
367 const char *fpath,
368 const struct stat *sb,
369 int tflag,
370 struct FTW *ftwbuf) {
371
9fe117ea 372 /* No need to label /dev twice in a row... */
edb49778
LP
373 if (_unlikely_(ftwbuf->level == 0))
374 return FTW_CONTINUE;
375
c9bc0764 376 label_fix(fpath, false, false);
af65c248 377
edb49778 378 /* /run/initramfs is static data and big, no need to
af65c248 379 * dynamically relabel its contents at boot... */
edb49778
LP
380 if (_unlikely_(ftwbuf->level == 1 &&
381 tflag == FTW_D &&
382 streq(fpath, "/run/initramfs")))
383 return FTW_SKIP_SUBTREE;
9fe117ea 384
edb49778 385 return FTW_CONTINUE;
1829dc9d
LP
386};
387
0b3325e7 388int mount_setup(bool loaded_policy) {
5c0532d1 389
af65c248
LP
390 static const char relabel[] =
391 "/run/initramfs/root-fsck\0"
392 "/run/initramfs/shutdown\0";
393
8e274523 394 int r;
dad08730 395 unsigned i;
5ba2dc25 396 const char *j;
8e274523 397
4ef31082
LP
398 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
399 r = mount_one(mount_table + i, true);
400
401 if (r < 0)
8e274523 402 return r;
4ef31082 403 }
8e274523 404
f1d19aa4
LP
405 /* Nodes in devtmpfs and /run need to be manually updated for
406 * the appropriate labels, after mounting. The other virtual
407 * API file systems like /sys and /proc do not need that, they
408 * use the same label for all their files. */
0b3325e7
LP
409 if (loaded_policy) {
410 usec_t before_relabel, after_relabel;
411 char timespan[FORMAT_TIMESPAN_MAX];
412
413 before_relabel = now(CLOCK_MONOTONIC);
414
edb49778
LP
415 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
416 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7 417
af65c248
LP
418 /* Explicitly relabel these */
419 NULSTR_FOREACH(j, relabel)
c9bc0764 420 label_fix(j, true, false);
af65c248 421
0b3325e7
LP
422 after_relabel = now(CLOCK_MONOTONIC);
423
424 log_info("Relabelled /dev and /run in %s.",
425 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
3bbecb2f 426 }
1829dc9d 427
5c0532d1 428 /* Create a few default symlinks, which are normally created
f1d19aa4 429 * by udevd, but some scripts might need them before we start
5c0532d1 430 * udevd. */
01ed0e23 431 dev_setup(NULL);
5c0532d1 432
b3ac5f8c
LP
433 /* Mark the root directory as shared in regards to mount
434 * propagation. The kernel defaults to "private", but we think
435 * it makes more sense to have a default of "shared" so that
436 * nspawn and the container tools work out of the box. If
437 * specific setups need other settings they can reset the
438 * propagation mode to private if needed. */
c481f78b
LP
439 if (detect_container(NULL) <= 0)
440 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
441 log_warning("Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 442
66e41181
LP
443 /* Create a few directories we always want around, Note that
444 * sd_booted() checks for /run/systemd/system, so this mkdir
445 * really needs to stay for good, otherwise software that
446 * copied sd-daemon.c into their sources will misdetect
447 * systemd. */
d2e54fae
KS
448 mkdir_label("/run/systemd", 0755);
449 mkdir_label("/run/systemd/system", 0755);
b925e726 450
0c85a4f3 451 return 0;
8e274523 452}