]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
shutdown: loop - convert to enumerate match
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
8e274523 44
bef2733f
LP
45#ifndef TTY_GID
46#define TTY_GID 5
47#endif
48
6aa220e0
KS
49typedef enum MountMode {
50 MNT_NONE = 0,
51 MNT_FATAL = 1 << 0,
52 MNT_IN_CONTAINER = 1 << 1,
53} MountMode;
54
ca714c0e
LP
55typedef struct MountPoint {
56 const char *what;
57 const char *where;
58 const char *type;
59 const char *options;
60 unsigned long flags;
6aa220e0
KS
61 bool (*condition_fn)(void);
62 MountMode mode;
ca714c0e
LP
63} MountPoint;
64
4ef31082 65/* The first three entries we might need before SELinux is up. The
160481f6
RS
66 * fourth (securityfs) is needed by IMA to load a custom policy. The
67 * other ones we can delay until SELinux and IMA are loaded. */
68#define N_EARLY_MOUNT 4
4ef31082 69
ca714c0e 70static const MountPoint mount_table[] = {
6aa220e0
KS
71 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
72 NULL, MNT_FATAL|MNT_IN_CONTAINER },
73 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
74 NULL, MNT_FATAL|MNT_IN_CONTAINER },
75 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_NONE },
79 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
80 is_efiboot, MNT_NONE },
81 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
82 NULL, MNT_FATAL|MNT_IN_CONTAINER },
83 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
84 NULL, MNT_IN_CONTAINER },
85 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
86 NULL, MNT_FATAL|MNT_IN_CONTAINER },
87 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
88 NULL, MNT_IN_CONTAINER },
89 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
90 NULL, MNT_IN_CONTAINER },
8e274523
LP
91};
92
949c6510 93/* These are API file systems that might be mounted by other software,
46ff0ed7 94 * we just list them here so that we know that we should ignore them */
949c6510 95
eaeb18db
LP
96static const char ignore_paths[] =
97 /* SELinux file systems */
98 "/sys/fs/selinux\0"
99 "/selinux\0"
100 /* Legacy cgroup mount points */
101 "/dev/cgroup\0"
102 "/cgroup\0"
103 /* Legacy kernel file system */
104 "/proc/bus/usb\0"
105 /* Container bind mounts */
106 "/proc/sys\0"
107 "/dev/console\0"
c481f78b 108 "/proc/kmsg\0";
949c6510 109
dad08730
LP
110bool mount_point_is_api(const char *path) {
111 unsigned i;
112
113 /* Checks if this mount point is considered "API", and hence
114 * should be ignored */
115
ca714c0e 116 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 117 if (path_equal(path, mount_table[i].where))
dad08730
LP
118 return true;
119
57f2a956
KS
120 return path_startswith(path, "/sys/fs/cgroup/");
121}
122
123bool mount_point_ignore(const char *path) {
eaeb18db 124 const char *i;
57f2a956 125
eaeb18db
LP
126 NULSTR_FOREACH(i, ignore_paths)
127 if (path_equal(path, i))
949c6510
LP
128 return true;
129
57f2a956 130 return false;
dad08730
LP
131}
132
4ef31082 133static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
134 int r;
135
ca714c0e 136 assert(p);
8e274523 137
6aa220e0
KS
138 if (p->condition_fn && !p->condition_fn())
139 return 0;
140
51b4af2c 141 /* Relabel first, just in case */
4ef31082 142 if (relabel)
c9bc0764 143 label_fix(p->where, true, true);
51b4af2c 144
c481f78b
LP
145 r = path_is_mount_point(p->where, true);
146 if (r < 0)
8e274523
LP
147 return r;
148
149 if (r > 0)
51b4af2c 150 return 0;
8e274523 151
c481f78b 152 /* Skip securityfs in a container */
6aa220e0 153 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
154 return 0;
155
a04f58d6
LP
156 /* The access mode here doesn't really matter too much, since
157 * the mounted file system will take precedence anyway. */
d2e54fae 158 mkdir_p_label(p->where, 0755);
a04f58d6 159
8e274523 160 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
161 p->what,
162 p->where,
163 p->type,
164 strna(p->options));
165
166 if (mount(p->what,
167 p->where,
168 p->type,
169 p->flags,
170 p->options) < 0) {
6aa220e0
KS
171 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
172 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
173 }
174
51b4af2c 175 /* Relabel again, since we now mounted something fresh here */
4ef31082 176 if (relabel)
c9bc0764 177 label_fix(p->where, false, false);
5275d3c1 178
0c85a4f3 179 return 1;
8e274523
LP
180}
181
4ef31082
LP
182int mount_setup_early(void) {
183 unsigned i;
184 int r = 0;
185
186 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
187
188 /* Do a minimal mount of /proc and friends to enable the most
189 * basic stuff, such as SELinux */
190 for (i = 0; i < N_EARLY_MOUNT; i ++) {
191 int j;
192
193 j = mount_one(mount_table + i, false);
194 if (r == 0)
195 r = j;
196 }
197
198 return r;
199}
200
0c85a4f3 201int mount_cgroup_controllers(char ***join_controllers) {
2076ca54
LP
202 int r;
203 FILE *f;
20c03b7b 204 char buf[LINE_MAX];
0c85a4f3 205 Set *controllers;
2076ca54 206
670802d4 207 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 208
0c85a4f3
LP
209 f = fopen("/proc/cgroups", "re");
210 if (!f) {
016e9849
LP
211 log_error("Failed to enumerate cgroup controllers: %m");
212 return 0;
213 }
2076ca54 214
0c85a4f3
LP
215 controllers = set_new(string_hash_func, string_compare_func);
216 if (!controllers) {
14212119 217 r = log_oom();
0c85a4f3
LP
218 goto finish;
219 }
220
2076ca54 221 /* Ignore the header line */
bab45044 222 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
223
224 for (;;) {
0c85a4f3
LP
225 char *controller;
226 int enabled = 0;
2076ca54 227
16f6682d 228 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
229
230 if (feof(f))
231 break;
232
233 log_error("Failed to parse /proc/cgroups.");
234 r = -EIO;
235 goto finish;
236 }
237
600a328f
LP
238 if (!enabled) {
239 free(controller);
240 continue;
241 }
242
0c85a4f3
LP
243 r = set_put(controllers, controller);
244 if (r < 0) {
245 log_error("Failed to add controller to set.");
2076ca54 246 free(controller);
0c85a4f3
LP
247 goto finish;
248 }
249 }
250
251 for (;;) {
252 MountPoint p;
253 char *controller, *where, *options;
254 char ***k = NULL;
255
256 controller = set_steal_first(controllers);
257 if (!controller)
258 break;
259
260 if (join_controllers)
261 for (k = join_controllers; *k; k++)
262 if (strv_find(*k, controller))
263 break;
264
265 if (k && *k) {
266 char **i, **j;
267
268 for (i = *k, j = *k; *i; i++) {
269
270 if (!streq(*i, controller)) {
271 char *t;
272
273 t = set_remove(controllers, *i);
274 if (!t) {
275 free(*i);
276 continue;
277 }
278 free(t);
279 }
280
281 *(j++) = *i;
282 }
283
284 *j = NULL;
285
286 options = strv_join(*k, ",");
287 if (!options) {
0c85a4f3 288 free(controller);
14212119 289 r = log_oom();
0c85a4f3
LP
290 goto finish;
291 }
292
293 } else {
294 options = controller;
295 controller = NULL;
296 }
297
298 where = strappend("/sys/fs/cgroup/", options);
299 if (!where) {
0c85a4f3 300 free(options);
14212119 301 r = log_oom();
2076ca54
LP
302 goto finish;
303 }
304
305 zero(p);
306 p.what = "cgroup";
307 p.where = where;
308 p.type = "cgroup";
0c85a4f3 309 p.options = options;
2076ca54 310 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
2076ca54 311
4ef31082 312 r = mount_one(&p, true);
2076ca54
LP
313 free(controller);
314 free(where);
315
0c85a4f3
LP
316 if (r < 0) {
317 free(options);
2076ca54 318 goto finish;
0c85a4f3
LP
319 }
320
321 if (r > 0 && k && *k) {
322 char **i;
323
324 for (i = *k; *i; i++) {
325 char *t;
326
327 t = strappend("/sys/fs/cgroup/", *i);
328 if (!t) {
14212119 329 r = log_oom();
0c85a4f3
LP
330 free(options);
331 goto finish;
332 }
333
334 r = symlink(options, t);
335 free(t);
336
337 if (r < 0 && errno != EEXIST) {
338 log_error("Failed to create symlink: %m");
339 r = -errno;
340 free(options);
341 goto finish;
342 }
343 }
344 }
345
346 free(options);
2076ca54
LP
347 }
348
349 r = 0;
350
351finish:
0c85a4f3
LP
352 set_free_free(controllers);
353
2076ca54
LP
354 fclose(f);
355
356 return r;
357}
358
1829dc9d
LP
359static int nftw_cb(
360 const char *fpath,
361 const struct stat *sb,
362 int tflag,
363 struct FTW *ftwbuf) {
364
9fe117ea 365 /* No need to label /dev twice in a row... */
edb49778
LP
366 if (_unlikely_(ftwbuf->level == 0))
367 return FTW_CONTINUE;
368
c9bc0764 369 label_fix(fpath, false, false);
af65c248 370
edb49778 371 /* /run/initramfs is static data and big, no need to
af65c248 372 * dynamically relabel its contents at boot... */
edb49778
LP
373 if (_unlikely_(ftwbuf->level == 1 &&
374 tflag == FTW_D &&
375 streq(fpath, "/run/initramfs")))
376 return FTW_SKIP_SUBTREE;
9fe117ea 377
edb49778 378 return FTW_CONTINUE;
1829dc9d
LP
379};
380
0b3325e7 381int mount_setup(bool loaded_policy) {
5c0532d1 382
af65c248
LP
383 static const char relabel[] =
384 "/run/initramfs/root-fsck\0"
385 "/run/initramfs/shutdown\0";
386
8e274523 387 int r;
dad08730 388 unsigned i;
5ba2dc25 389 const char *j;
8e274523 390
4ef31082
LP
391 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
392 r = mount_one(mount_table + i, true);
393
394 if (r < 0)
8e274523 395 return r;
4ef31082 396 }
8e274523 397
f1d19aa4
LP
398 /* Nodes in devtmpfs and /run need to be manually updated for
399 * the appropriate labels, after mounting. The other virtual
400 * API file systems like /sys and /proc do not need that, they
401 * use the same label for all their files. */
0b3325e7
LP
402 if (loaded_policy) {
403 usec_t before_relabel, after_relabel;
404 char timespan[FORMAT_TIMESPAN_MAX];
405
406 before_relabel = now(CLOCK_MONOTONIC);
407
edb49778
LP
408 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
409 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7 410
af65c248
LP
411 /* Explicitly relabel these */
412 NULSTR_FOREACH(j, relabel)
c9bc0764 413 label_fix(j, true, false);
af65c248 414
0b3325e7
LP
415 after_relabel = now(CLOCK_MONOTONIC);
416
417 log_info("Relabelled /dev and /run in %s.",
418 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
3bbecb2f 419 }
1829dc9d 420
5c0532d1 421 /* Create a few default symlinks, which are normally created
f1d19aa4 422 * by udevd, but some scripts might need them before we start
5c0532d1 423 * udevd. */
01ed0e23 424 dev_setup(NULL);
5c0532d1 425
b3ac5f8c
LP
426 /* Mark the root directory as shared in regards to mount
427 * propagation. The kernel defaults to "private", but we think
428 * it makes more sense to have a default of "shared" so that
429 * nspawn and the container tools work out of the box. If
430 * specific setups need other settings they can reset the
431 * propagation mode to private if needed. */
c481f78b
LP
432 if (detect_container(NULL) <= 0)
433 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
434 log_warning("Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 435
b925e726 436 /* Create a few directories we always want around */
d2e54fae
KS
437 mkdir_label("/run/systemd", 0755);
438 mkdir_label("/run/systemd/system", 0755);
b925e726 439
0c85a4f3 440 return 0;
8e274523 441}