]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
systemd,nspawn: use extended attributes to store metadata
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
34e5a31e 44#include "efivars.h"
8e274523 45
bef2733f
LP
46#ifndef TTY_GID
47#define TTY_GID 5
48#endif
49
6aa220e0
KS
50typedef enum MountMode {
51 MNT_NONE = 0,
52 MNT_FATAL = 1 << 0,
53 MNT_IN_CONTAINER = 1 << 1,
54} MountMode;
55
ca714c0e
LP
56typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
6aa220e0
KS
62 bool (*condition_fn)(void);
63 MountMode mode;
ca714c0e
LP
64} MountPoint;
65
4ef31082 66/* The first three entries we might need before SELinux is up. The
160481f6
RS
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
ffbd2c4d 69#define N_EARLY_MOUNT 5
4ef31082 70
a383724e
ZJS
71#ifdef HAVE_XATTR
72# define FS_XATTR_OPT ",xattr"
73#else
74# define FS_XATTR_OPT ""
75#endif
76
ca714c0e 77static const MountPoint mount_table[] = {
6aa220e0
KS
78 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_FATAL|MNT_IN_CONTAINER },
80 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
81 NULL, MNT_FATAL|MNT_IN_CONTAINER },
82 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
83 NULL, MNT_FATAL|MNT_IN_CONTAINER },
84 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
85 NULL, MNT_NONE },
ffbd2c4d
NC
86 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
87 NULL, MNT_NONE },
6aa220e0
KS
88 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
89 NULL, MNT_FATAL|MNT_IN_CONTAINER },
90 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
91 NULL, MNT_IN_CONTAINER },
92 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
93 NULL, MNT_FATAL|MNT_IN_CONTAINER },
94 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
95 NULL, MNT_IN_CONTAINER },
a383724e 96 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd" FS_XATTR_OPT, MS_NOSUID|MS_NOEXEC|MS_NODEV,
6aa220e0 97 NULL, MNT_IN_CONTAINER },
c06bf414
KS
98 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
99 NULL, MNT_NONE },
100#ifdef ENABLE_EFI
101 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 is_efi_boot, MNT_NONE },
103#endif
8e274523
LP
104};
105
949c6510 106/* These are API file systems that might be mounted by other software,
46ff0ed7 107 * we just list them here so that we know that we should ignore them */
949c6510 108
eaeb18db
LP
109static const char ignore_paths[] =
110 /* SELinux file systems */
111 "/sys/fs/selinux\0"
112 "/selinux\0"
113 /* Legacy cgroup mount points */
114 "/dev/cgroup\0"
115 "/cgroup\0"
116 /* Legacy kernel file system */
117 "/proc/bus/usb\0"
118 /* Container bind mounts */
119 "/proc/sys\0"
120 "/dev/console\0"
c481f78b 121 "/proc/kmsg\0";
949c6510 122
dad08730
LP
123bool mount_point_is_api(const char *path) {
124 unsigned i;
125
126 /* Checks if this mount point is considered "API", and hence
127 * should be ignored */
128
ca714c0e 129 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 130 if (path_equal(path, mount_table[i].where))
dad08730
LP
131 return true;
132
57f2a956
KS
133 return path_startswith(path, "/sys/fs/cgroup/");
134}
135
136bool mount_point_ignore(const char *path) {
eaeb18db 137 const char *i;
57f2a956 138
eaeb18db
LP
139 NULSTR_FOREACH(i, ignore_paths)
140 if (path_equal(path, i))
949c6510
LP
141 return true;
142
57f2a956 143 return false;
dad08730
LP
144}
145
4ef31082 146static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
147 int r;
148
ca714c0e 149 assert(p);
8e274523 150
6aa220e0
KS
151 if (p->condition_fn && !p->condition_fn())
152 return 0;
153
51b4af2c 154 /* Relabel first, just in case */
4ef31082 155 if (relabel)
c9bc0764 156 label_fix(p->where, true, true);
51b4af2c 157
c481f78b
LP
158 r = path_is_mount_point(p->where, true);
159 if (r < 0)
8e274523
LP
160 return r;
161
162 if (r > 0)
51b4af2c 163 return 0;
8e274523 164
c481f78b 165 /* Skip securityfs in a container */
6aa220e0 166 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
167 return 0;
168
a04f58d6
LP
169 /* The access mode here doesn't really matter too much, since
170 * the mounted file system will take precedence anyway. */
d2e54fae 171 mkdir_p_label(p->where, 0755);
a04f58d6 172
8e274523 173 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
174 p->what,
175 p->where,
176 p->type,
177 strna(p->options));
178
179 if (mount(p->what,
180 p->where,
181 p->type,
182 p->flags,
183 p->options) < 0) {
6aa220e0
KS
184 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
185 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
186 }
187
51b4af2c 188 /* Relabel again, since we now mounted something fresh here */
4ef31082 189 if (relabel)
c9bc0764 190 label_fix(p->where, false, false);
5275d3c1 191
0c85a4f3 192 return 1;
8e274523
LP
193}
194
4ef31082
LP
195int mount_setup_early(void) {
196 unsigned i;
197 int r = 0;
198
199 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
200
201 /* Do a minimal mount of /proc and friends to enable the most
202 * basic stuff, such as SELinux */
203 for (i = 0; i < N_EARLY_MOUNT; i ++) {
204 int j;
205
206 j = mount_one(mount_table + i, false);
207 if (r == 0)
208 r = j;
209 }
210
211 return r;
212}
213
0c85a4f3 214int mount_cgroup_controllers(char ***join_controllers) {
2076ca54
LP
215 int r;
216 FILE *f;
20c03b7b 217 char buf[LINE_MAX];
0c85a4f3 218 Set *controllers;
2076ca54 219
670802d4 220 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 221
0c85a4f3
LP
222 f = fopen("/proc/cgroups", "re");
223 if (!f) {
016e9849
LP
224 log_error("Failed to enumerate cgroup controllers: %m");
225 return 0;
226 }
2076ca54 227
0c85a4f3
LP
228 controllers = set_new(string_hash_func, string_compare_func);
229 if (!controllers) {
14212119 230 r = log_oom();
0c85a4f3
LP
231 goto finish;
232 }
233
2076ca54 234 /* Ignore the header line */
bab45044 235 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
236
237 for (;;) {
0c85a4f3
LP
238 char *controller;
239 int enabled = 0;
2076ca54 240
16f6682d 241 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
242
243 if (feof(f))
244 break;
245
246 log_error("Failed to parse /proc/cgroups.");
247 r = -EIO;
248 goto finish;
249 }
250
600a328f
LP
251 if (!enabled) {
252 free(controller);
253 continue;
254 }
255
0c85a4f3
LP
256 r = set_put(controllers, controller);
257 if (r < 0) {
258 log_error("Failed to add controller to set.");
2076ca54 259 free(controller);
0c85a4f3
LP
260 goto finish;
261 }
262 }
263
264 for (;;) {
265 MountPoint p;
266 char *controller, *where, *options;
267 char ***k = NULL;
268
269 controller = set_steal_first(controllers);
270 if (!controller)
271 break;
272
273 if (join_controllers)
274 for (k = join_controllers; *k; k++)
275 if (strv_find(*k, controller))
276 break;
277
278 if (k && *k) {
279 char **i, **j;
280
281 for (i = *k, j = *k; *i; i++) {
282
283 if (!streq(*i, controller)) {
284 char *t;
285
286 t = set_remove(controllers, *i);
287 if (!t) {
288 free(*i);
289 continue;
290 }
291 free(t);
292 }
293
294 *(j++) = *i;
295 }
296
297 *j = NULL;
298
299 options = strv_join(*k, ",");
300 if (!options) {
0c85a4f3 301 free(controller);
14212119 302 r = log_oom();
0c85a4f3
LP
303 goto finish;
304 }
305
306 } else {
307 options = controller;
308 controller = NULL;
309 }
310
311 where = strappend("/sys/fs/cgroup/", options);
312 if (!where) {
0c85a4f3 313 free(options);
14212119 314 r = log_oom();
2076ca54
LP
315 goto finish;
316 }
317
318 zero(p);
319 p.what = "cgroup";
320 p.where = where;
321 p.type = "cgroup";
0c85a4f3 322 p.options = options;
2076ca54 323 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
3cbb76ee 324 p.mode = MNT_IN_CONTAINER;
2076ca54 325
4ef31082 326 r = mount_one(&p, true);
2076ca54
LP
327 free(controller);
328 free(where);
329
0c85a4f3
LP
330 if (r < 0) {
331 free(options);
2076ca54 332 goto finish;
0c85a4f3
LP
333 }
334
335 if (r > 0 && k && *k) {
336 char **i;
337
338 for (i = *k; *i; i++) {
339 char *t;
340
341 t = strappend("/sys/fs/cgroup/", *i);
342 if (!t) {
14212119 343 r = log_oom();
0c85a4f3
LP
344 free(options);
345 goto finish;
346 }
347
348 r = symlink(options, t);
349 free(t);
350
351 if (r < 0 && errno != EEXIST) {
352 log_error("Failed to create symlink: %m");
353 r = -errno;
354 free(options);
355 goto finish;
356 }
357 }
358 }
359
360 free(options);
2076ca54
LP
361 }
362
363 r = 0;
364
365finish:
0c85a4f3
LP
366 set_free_free(controllers);
367
2076ca54
LP
368 fclose(f);
369
370 return r;
371}
372
1829dc9d
LP
373static int nftw_cb(
374 const char *fpath,
375 const struct stat *sb,
376 int tflag,
377 struct FTW *ftwbuf) {
378
9fe117ea 379 /* No need to label /dev twice in a row... */
edb49778
LP
380 if (_unlikely_(ftwbuf->level == 0))
381 return FTW_CONTINUE;
382
c9bc0764 383 label_fix(fpath, false, false);
af65c248 384
edb49778 385 /* /run/initramfs is static data and big, no need to
af65c248 386 * dynamically relabel its contents at boot... */
edb49778
LP
387 if (_unlikely_(ftwbuf->level == 1 &&
388 tflag == FTW_D &&
389 streq(fpath, "/run/initramfs")))
390 return FTW_SKIP_SUBTREE;
9fe117ea 391
edb49778 392 return FTW_CONTINUE;
1829dc9d
LP
393};
394
0b3325e7 395int mount_setup(bool loaded_policy) {
5c0532d1 396
af65c248
LP
397 static const char relabel[] =
398 "/run/initramfs/root-fsck\0"
399 "/run/initramfs/shutdown\0";
400
8e274523 401 int r;
dad08730 402 unsigned i;
5ba2dc25 403 const char *j;
8e274523 404
4ef31082
LP
405 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
406 r = mount_one(mount_table + i, true);
407
408 if (r < 0)
8e274523 409 return r;
4ef31082 410 }
8e274523 411
f1d19aa4
LP
412 /* Nodes in devtmpfs and /run need to be manually updated for
413 * the appropriate labels, after mounting. The other virtual
414 * API file systems like /sys and /proc do not need that, they
415 * use the same label for all their files. */
0b3325e7
LP
416 if (loaded_policy) {
417 usec_t before_relabel, after_relabel;
418 char timespan[FORMAT_TIMESPAN_MAX];
419
420 before_relabel = now(CLOCK_MONOTONIC);
421
edb49778
LP
422 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
423 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7 424
af65c248
LP
425 /* Explicitly relabel these */
426 NULSTR_FOREACH(j, relabel)
c9bc0764 427 label_fix(j, true, false);
af65c248 428
0b3325e7
LP
429 after_relabel = now(CLOCK_MONOTONIC);
430
431 log_info("Relabelled /dev and /run in %s.",
2fa4092c 432 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 433 }
1829dc9d 434
5c0532d1 435 /* Create a few default symlinks, which are normally created
f1d19aa4 436 * by udevd, but some scripts might need them before we start
5c0532d1 437 * udevd. */
01ed0e23 438 dev_setup(NULL);
5c0532d1 439
b3ac5f8c
LP
440 /* Mark the root directory as shared in regards to mount
441 * propagation. The kernel defaults to "private", but we think
442 * it makes more sense to have a default of "shared" so that
443 * nspawn and the container tools work out of the box. If
444 * specific setups need other settings they can reset the
445 * propagation mode to private if needed. */
c481f78b
LP
446 if (detect_container(NULL) <= 0)
447 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
448 log_warning("Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 449
66e41181
LP
450 /* Create a few directories we always want around, Note that
451 * sd_booted() checks for /run/systemd/system, so this mkdir
452 * really needs to stay for good, otherwise software that
453 * copied sd-daemon.c into their sources will misdetect
454 * systemd. */
d2e54fae
KS
455 mkdir_label("/run/systemd", 0755);
456 mkdir_label("/run/systemd/system", 0755);
c17ec25e 457 mkdir_label("/run/systemd/inaccessible", 0000);
b925e726 458
0c85a4f3 459 return 0;
8e274523 460}