]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
cgroup: downgrade log messages when we cannot write to cgroup trees that are mounted...
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
34e5a31e 44#include "efivars.h"
8552b176 45#include "smack-util.h"
7f112f50 46#include "def.h"
bef2733f 47
6aa220e0
KS
48typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52} MountMode;
53
ca714c0e
LP
54typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
6aa220e0
KS
60 bool (*condition_fn)(void);
61 MountMode mode;
ca714c0e
LP
62} MountPoint;
63
4ef31082 64/* The first three entries we might need before SELinux is up. The
160481f6 65 * fourth (securityfs) is needed by IMA to load a custom policy. The
7c96ab1d
LP
66 * other ones we can delay until SELinux and IMA are loaded. When
67 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
68#ifdef HAVE_SMACK
ffbd2c4d 69#define N_EARLY_MOUNT 5
7c96ab1d
LP
70#else
71#define N_EARLY_MOUNT 4
72#endif
4ef31082 73
ca714c0e 74static const MountPoint mount_table[] = {
68d4c452
LP
75 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 NULL, MNT_NONE },
d407c940 83#ifdef HAVE_SMACK
68d4c452
LP
84 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
85 mac_smack_use, MNT_FATAL },
86 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 mac_smack_use, MNT_FATAL },
d407c940 88#endif
68d4c452
LP
89 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
90 NULL, MNT_FATAL|MNT_IN_CONTAINER },
91 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
92 NULL, MNT_IN_CONTAINER },
d407c940 93#ifdef HAVE_SMACK
68d4c452
LP
94 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
95 mac_smack_use, MNT_FATAL },
d407c940 96#endif
68d4c452
LP
97 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
98 NULL, MNT_FATAL|MNT_IN_CONTAINER },
99 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
101 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 NULL, MNT_IN_CONTAINER },
103 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 NULL, MNT_FATAL|MNT_IN_CONTAINER },
105 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 NULL, MNT_NONE },
c06bf414 107#ifdef ENABLE_EFI
68d4c452
LP
108 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_efi_boot, MNT_NONE },
c06bf414 110#endif
63cc4c31
DM
111#ifdef ENABLE_KDBUS
112 { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
113 NULL, MNT_IN_CONTAINER },
114#endif
115};
116
949c6510 117/* These are API file systems that might be mounted by other software,
46ff0ed7 118 * we just list them here so that we know that we should ignore them */
949c6510 119
eaeb18db
LP
120static const char ignore_paths[] =
121 /* SELinux file systems */
122 "/sys/fs/selinux\0"
123 "/selinux\0"
124 /* Legacy cgroup mount points */
125 "/dev/cgroup\0"
126 "/cgroup\0"
127 /* Legacy kernel file system */
128 "/proc/bus/usb\0"
129 /* Container bind mounts */
130 "/proc/sys\0"
131 "/dev/console\0"
c481f78b 132 "/proc/kmsg\0";
949c6510 133
dad08730
LP
134bool mount_point_is_api(const char *path) {
135 unsigned i;
136
137 /* Checks if this mount point is considered "API", and hence
138 * should be ignored */
139
ca714c0e 140 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 141 if (path_equal(path, mount_table[i].where))
dad08730
LP
142 return true;
143
57f2a956
KS
144 return path_startswith(path, "/sys/fs/cgroup/");
145}
146
147bool mount_point_ignore(const char *path) {
eaeb18db 148 const char *i;
57f2a956 149
eaeb18db
LP
150 NULSTR_FOREACH(i, ignore_paths)
151 if (path_equal(path, i))
949c6510
LP
152 return true;
153
57f2a956 154 return false;
dad08730
LP
155}
156
4ef31082 157static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
158 int r;
159
ca714c0e 160 assert(p);
8e274523 161
6aa220e0
KS
162 if (p->condition_fn && !p->condition_fn())
163 return 0;
164
51b4af2c 165 /* Relabel first, just in case */
4ef31082 166 if (relabel)
c9bc0764 167 label_fix(p->where, true, true);
51b4af2c 168
c481f78b
LP
169 r = path_is_mount_point(p->where, true);
170 if (r < 0)
8e274523
LP
171 return r;
172
173 if (r > 0)
51b4af2c 174 return 0;
8e274523 175
c481f78b 176 /* Skip securityfs in a container */
6aa220e0 177 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
178 return 0;
179
a04f58d6
LP
180 /* The access mode here doesn't really matter too much, since
181 * the mounted file system will take precedence anyway. */
c4bfd169
LP
182 if (relabel)
183 mkdir_p_label(p->where, 0755);
184 else
185 mkdir_p(p->where, 0755);
a04f58d6 186
8e274523 187 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
188 p->what,
189 p->where,
190 p->type,
191 strna(p->options));
192
193 if (mount(p->what,
194 p->where,
195 p->type,
196 p->flags,
197 p->options) < 0) {
99a17ada 198 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
6aa220e0 199 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
200 }
201
51b4af2c 202 /* Relabel again, since we now mounted something fresh here */
4ef31082 203 if (relabel)
c9bc0764 204 label_fix(p->where, false, false);
5275d3c1 205
0c85a4f3 206 return 1;
8e274523
LP
207}
208
4ef31082
LP
209int mount_setup_early(void) {
210 unsigned i;
211 int r = 0;
212
213 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
214
215 /* Do a minimal mount of /proc and friends to enable the most
216 * basic stuff, such as SELinux */
217 for (i = 0; i < N_EARLY_MOUNT; i ++) {
218 int j;
219
220 j = mount_one(mount_table + i, false);
221 if (r == 0)
222 r = j;
223 }
224
225 return r;
226}
227
0c85a4f3 228int mount_cgroup_controllers(char ***join_controllers) {
a6b26d90
ZJS
229 _cleanup_set_free_free_ Set *controllers = NULL;
230 _cleanup_fclose_ FILE *f;
a641dcd9
LP
231 char buf[LINE_MAX];
232 int r;
2076ca54 233
670802d4 234 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 235
0c85a4f3
LP
236 f = fopen("/proc/cgroups", "re");
237 if (!f) {
56f64d95 238 log_error_errno(errno, "Failed to enumerate cgroup controllers: %m");
016e9849
LP
239 return 0;
240 }
2076ca54 241
d5099efc 242 controllers = set_new(&string_hash_ops);
a6b26d90
ZJS
243 if (!controllers)
244 return log_oom();
0c85a4f3 245
2076ca54 246 /* Ignore the header line */
bab45044 247 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
248
249 for (;;) {
0c85a4f3
LP
250 char *controller;
251 int enabled = 0;
2076ca54 252
16f6682d 253 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
254
255 if (feof(f))
256 break;
257
258 log_error("Failed to parse /proc/cgroups.");
a6b26d90 259 return -EIO;
2076ca54
LP
260 }
261
600a328f
LP
262 if (!enabled) {
263 free(controller);
264 continue;
265 }
266
ef42202a 267 r = set_consume(controllers, controller);
0c85a4f3
LP
268 if (r < 0) {
269 log_error("Failed to add controller to set.");
a6b26d90 270 return r;
0c85a4f3
LP
271 }
272 }
273
274 for (;;) {
a641dcd9 275 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
a6b26d90
ZJS
276 MountPoint p = {
277 .what = "cgroup",
278 .type = "cgroup",
279 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
280 .mode = MNT_IN_CONTAINER,
281 };
0c85a4f3
LP
282 char ***k = NULL;
283
284 controller = set_steal_first(controllers);
285 if (!controller)
286 break;
287
288 if (join_controllers)
289 for (k = join_controllers; *k; k++)
290 if (strv_find(*k, controller))
291 break;
292
293 if (k && *k) {
294 char **i, **j;
295
296 for (i = *k, j = *k; *i; i++) {
297
298 if (!streq(*i, controller)) {
a641dcd9 299 _cleanup_free_ char *t;
0c85a4f3
LP
300
301 t = set_remove(controllers, *i);
302 if (!t) {
303 free(*i);
304 continue;
305 }
0c85a4f3
LP
306 }
307
308 *(j++) = *i;
309 }
310
311 *j = NULL;
312
313 options = strv_join(*k, ",");
a6b26d90
ZJS
314 if (!options)
315 return log_oom();
0c85a4f3
LP
316 } else {
317 options = controller;
318 controller = NULL;
319 }
320
a641dcd9
LP
321 where = strappend("/sys/fs/cgroup/", options);
322 if (!where)
323 return log_oom();
324
325 p.where = where;
0c85a4f3 326 p.options = options;
2076ca54 327
4ef31082 328 r = mount_one(&p, true);
a6b26d90
ZJS
329 if (r < 0)
330 return r;
0c85a4f3
LP
331
332 if (r > 0 && k && *k) {
333 char **i;
334
335 for (i = *k; *i; i++) {
a641dcd9
LP
336 _cleanup_free_ char *t = NULL;
337
338 t = strappend("/sys/fs/cgroup/", *i);
339 if (!t)
340 return log_oom();
0c85a4f3
LP
341
342 r = symlink(options, t);
4a62c710
MS
343 if (r < 0 && errno != EEXIST)
344 return log_error_errno(errno, "Failed to create symlink %s: %m", t);
0c85a4f3
LP
345 }
346 }
2076ca54
LP
347 }
348
679142ce
LP
349 /* Now that we mounted everything, let's make the tmpfs the
350 * cgroup file systems are mounted into read-only. */
351 mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
352
a6b26d90 353 return 0;
2076ca54
LP
354}
355
0fff82e5 356#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
1829dc9d
LP
357static int nftw_cb(
358 const char *fpath,
359 const struct stat *sb,
360 int tflag,
361 struct FTW *ftwbuf) {
362
9fe117ea 363 /* No need to label /dev twice in a row... */
edb49778
LP
364 if (_unlikely_(ftwbuf->level == 0))
365 return FTW_CONTINUE;
366
c9bc0764 367 label_fix(fpath, false, false);
af65c248 368
edb49778 369 /* /run/initramfs is static data and big, no need to
af65c248 370 * dynamically relabel its contents at boot... */
edb49778
LP
371 if (_unlikely_(ftwbuf->level == 1 &&
372 tflag == FTW_D &&
373 streq(fpath, "/run/initramfs")))
374 return FTW_SKIP_SUBTREE;
9fe117ea 375
edb49778 376 return FTW_CONTINUE;
1829dc9d 377};
0fff82e5 378#endif
1829dc9d 379
0b3325e7 380int mount_setup(bool loaded_policy) {
dad08730 381 unsigned i;
68d4c452 382 int r = 0;
8e274523 383
4ef31082 384 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
68d4c452 385 int j;
4ef31082 386
68d4c452
LP
387 j = mount_one(mount_table + i, loaded_policy);
388 if (r == 0)
389 r = j;
4ef31082 390 }
8e274523 391
68d4c452
LP
392 if (r < 0)
393 return r;
394
0fff82e5 395#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
f1d19aa4
LP
396 /* Nodes in devtmpfs and /run need to be manually updated for
397 * the appropriate labels, after mounting. The other virtual
398 * API file systems like /sys and /proc do not need that, they
399 * use the same label for all their files. */
0b3325e7
LP
400 if (loaded_policy) {
401 usec_t before_relabel, after_relabel;
402 char timespan[FORMAT_TIMESPAN_MAX];
403
404 before_relabel = now(CLOCK_MONOTONIC);
405
edb49778
LP
406 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
407 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7
LP
408
409 after_relabel = now(CLOCK_MONOTONIC);
410
411 log_info("Relabelled /dev and /run in %s.",
2fa4092c 412 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 413 }
0fff82e5 414#endif
1829dc9d 415
5c0532d1 416 /* Create a few default symlinks, which are normally created
f1d19aa4 417 * by udevd, but some scripts might need them before we start
5c0532d1 418 * udevd. */
01ed0e23 419 dev_setup(NULL);
5c0532d1 420
b3ac5f8c
LP
421 /* Mark the root directory as shared in regards to mount
422 * propagation. The kernel defaults to "private", but we think
423 * it makes more sense to have a default of "shared" so that
424 * nspawn and the container tools work out of the box. If
425 * specific setups need other settings they can reset the
426 * propagation mode to private if needed. */
c481f78b
LP
427 if (detect_container(NULL) <= 0)
428 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
56f64d95 429 log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 430
66e41181
LP
431 /* Create a few directories we always want around, Note that
432 * sd_booted() checks for /run/systemd/system, so this mkdir
433 * really needs to stay for good, otherwise software that
434 * copied sd-daemon.c into their sources will misdetect
435 * systemd. */
d2e54fae
KS
436 mkdir_label("/run/systemd", 0755);
437 mkdir_label("/run/systemd/system", 0755);
c17ec25e 438 mkdir_label("/run/systemd/inaccessible", 0000);
b925e726 439
0c85a4f3 440 return 0;
8e274523 441}