]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
cgroup: it's not OK to invoke alloca() in loops
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
34e5a31e 44#include "efivars.h"
8552b176 45#include "smack-util.h"
7f112f50 46#include "def.h"
bef2733f 47
6aa220e0
KS
48typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52} MountMode;
53
ca714c0e
LP
54typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
6aa220e0
KS
60 bool (*condition_fn)(void);
61 MountMode mode;
ca714c0e
LP
62} MountPoint;
63
4ef31082 64/* The first three entries we might need before SELinux is up. The
160481f6
RS
65 * fourth (securityfs) is needed by IMA to load a custom policy. The
66 * other ones we can delay until SELinux and IMA are loaded. */
ffbd2c4d 67#define N_EARLY_MOUNT 5
4ef31082 68
ca714c0e 69static const MountPoint mount_table[] = {
6aa220e0
KS
70 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
71 NULL, MNT_FATAL|MNT_IN_CONTAINER },
72 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
74 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
77 NULL, MNT_NONE },
d407c940 78#ifdef HAVE_SMACK
8552b176
AK
79 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
80 use_smack, MNT_FATAL },
d407c940 81 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
8552b176 82 use_smack, MNT_FATAL },
d407c940 83#endif
6aa220e0
KS
84 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
85 NULL, MNT_FATAL|MNT_IN_CONTAINER },
86 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
87 NULL, MNT_IN_CONTAINER },
d407c940
AK
88#ifdef HAVE_SMACK
89 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
8552b176 90 use_smack, MNT_FATAL },
d407c940 91#endif
6aa220e0
KS
92 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
93 NULL, MNT_FATAL|MNT_IN_CONTAINER },
94 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
95 NULL, MNT_IN_CONTAINER },
0ee67958
ZJS
96#ifdef HAVE_XATTR
97 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
98 NULL, MNT_IN_CONTAINER },
99#endif
100 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
6aa220e0 101 NULL, MNT_IN_CONTAINER },
c06bf414
KS
102 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
103 NULL, MNT_NONE },
104#ifdef ENABLE_EFI
105 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 is_efi_boot, MNT_NONE },
107#endif
8e274523
LP
108};
109
949c6510 110/* These are API file systems that might be mounted by other software,
46ff0ed7 111 * we just list them here so that we know that we should ignore them */
949c6510 112
eaeb18db
LP
113static const char ignore_paths[] =
114 /* SELinux file systems */
115 "/sys/fs/selinux\0"
116 "/selinux\0"
117 /* Legacy cgroup mount points */
118 "/dev/cgroup\0"
119 "/cgroup\0"
120 /* Legacy kernel file system */
121 "/proc/bus/usb\0"
122 /* Container bind mounts */
123 "/proc/sys\0"
124 "/dev/console\0"
c481f78b 125 "/proc/kmsg\0";
949c6510 126
dad08730
LP
127bool mount_point_is_api(const char *path) {
128 unsigned i;
129
130 /* Checks if this mount point is considered "API", and hence
131 * should be ignored */
132
ca714c0e 133 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 134 if (path_equal(path, mount_table[i].where))
dad08730
LP
135 return true;
136
57f2a956
KS
137 return path_startswith(path, "/sys/fs/cgroup/");
138}
139
140bool mount_point_ignore(const char *path) {
eaeb18db 141 const char *i;
57f2a956 142
eaeb18db
LP
143 NULSTR_FOREACH(i, ignore_paths)
144 if (path_equal(path, i))
949c6510
LP
145 return true;
146
57f2a956 147 return false;
dad08730
LP
148}
149
4ef31082 150static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
151 int r;
152
ca714c0e 153 assert(p);
8e274523 154
6aa220e0
KS
155 if (p->condition_fn && !p->condition_fn())
156 return 0;
157
51b4af2c 158 /* Relabel first, just in case */
4ef31082 159 if (relabel)
c9bc0764 160 label_fix(p->where, true, true);
51b4af2c 161
c481f78b
LP
162 r = path_is_mount_point(p->where, true);
163 if (r < 0)
8e274523
LP
164 return r;
165
166 if (r > 0)
51b4af2c 167 return 0;
8e274523 168
c481f78b 169 /* Skip securityfs in a container */
6aa220e0 170 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
171 return 0;
172
a04f58d6
LP
173 /* The access mode here doesn't really matter too much, since
174 * the mounted file system will take precedence anyway. */
d2e54fae 175 mkdir_p_label(p->where, 0755);
a04f58d6 176
8e274523 177 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
178 p->what,
179 p->where,
180 p->type,
181 strna(p->options));
182
183 if (mount(p->what,
184 p->where,
185 p->type,
186 p->flags,
187 p->options) < 0) {
f5f6d0e2 188 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %m", p->where);
6aa220e0 189 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
190 }
191
51b4af2c 192 /* Relabel again, since we now mounted something fresh here */
4ef31082 193 if (relabel)
c9bc0764 194 label_fix(p->where, false, false);
5275d3c1 195
0c85a4f3 196 return 1;
8e274523
LP
197}
198
4ef31082
LP
199int mount_setup_early(void) {
200 unsigned i;
201 int r = 0;
202
203 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
204
205 /* Do a minimal mount of /proc and friends to enable the most
206 * basic stuff, such as SELinux */
207 for (i = 0; i < N_EARLY_MOUNT; i ++) {
208 int j;
209
210 j = mount_one(mount_table + i, false);
211 if (r == 0)
212 r = j;
213 }
214
215 return r;
216}
217
0c85a4f3 218int mount_cgroup_controllers(char ***join_controllers) {
a6b26d90
ZJS
219 _cleanup_set_free_free_ Set *controllers = NULL;
220 _cleanup_fclose_ FILE *f;
a641dcd9
LP
221 char buf[LINE_MAX];
222 int r;
2076ca54 223
670802d4 224 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 225
0c85a4f3
LP
226 f = fopen("/proc/cgroups", "re");
227 if (!f) {
016e9849
LP
228 log_error("Failed to enumerate cgroup controllers: %m");
229 return 0;
230 }
2076ca54 231
0c85a4f3 232 controllers = set_new(string_hash_func, string_compare_func);
a6b26d90
ZJS
233 if (!controllers)
234 return log_oom();
0c85a4f3 235
2076ca54 236 /* Ignore the header line */
bab45044 237 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
238
239 for (;;) {
0c85a4f3
LP
240 char *controller;
241 int enabled = 0;
2076ca54 242
16f6682d 243 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
244
245 if (feof(f))
246 break;
247
248 log_error("Failed to parse /proc/cgroups.");
a6b26d90 249 return -EIO;
2076ca54
LP
250 }
251
600a328f
LP
252 if (!enabled) {
253 free(controller);
254 continue;
255 }
256
ef42202a 257 r = set_consume(controllers, controller);
0c85a4f3
LP
258 if (r < 0) {
259 log_error("Failed to add controller to set.");
a6b26d90 260 return r;
0c85a4f3
LP
261 }
262 }
263
264 for (;;) {
a641dcd9 265 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
a6b26d90
ZJS
266 MountPoint p = {
267 .what = "cgroup",
268 .type = "cgroup",
269 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
270 .mode = MNT_IN_CONTAINER,
271 };
0c85a4f3
LP
272 char ***k = NULL;
273
274 controller = set_steal_first(controllers);
275 if (!controller)
276 break;
277
278 if (join_controllers)
279 for (k = join_controllers; *k; k++)
280 if (strv_find(*k, controller))
281 break;
282
283 if (k && *k) {
284 char **i, **j;
285
286 for (i = *k, j = *k; *i; i++) {
287
288 if (!streq(*i, controller)) {
a641dcd9 289 _cleanup_free_ char *t;
0c85a4f3
LP
290
291 t = set_remove(controllers, *i);
292 if (!t) {
293 free(*i);
294 continue;
295 }
0c85a4f3
LP
296 }
297
298 *(j++) = *i;
299 }
300
301 *j = NULL;
302
303 options = strv_join(*k, ",");
a6b26d90
ZJS
304 if (!options)
305 return log_oom();
0c85a4f3
LP
306 } else {
307 options = controller;
308 controller = NULL;
309 }
310
a641dcd9
LP
311 where = strappend("/sys/fs/cgroup/", options);
312 if (!where)
313 return log_oom();
314
315 p.where = where;
0c85a4f3 316 p.options = options;
2076ca54 317
4ef31082 318 r = mount_one(&p, true);
a6b26d90
ZJS
319 if (r < 0)
320 return r;
0c85a4f3
LP
321
322 if (r > 0 && k && *k) {
323 char **i;
324
325 for (i = *k; *i; i++) {
a641dcd9
LP
326 _cleanup_free_ char *t = NULL;
327
328 t = strappend("/sys/fs/cgroup/", *i);
329 if (!t)
330 return log_oom();
0c85a4f3
LP
331
332 r = symlink(options, t);
0c85a4f3 333 if (r < 0 && errno != EEXIST) {
c79bb9e4 334 log_error("Failed to create symlink %s: %m", t);
a6b26d90 335 return -errno;
0c85a4f3
LP
336 }
337 }
338 }
2076ca54
LP
339 }
340
a6b26d90 341 return 0;
2076ca54
LP
342}
343
1829dc9d
LP
344static int nftw_cb(
345 const char *fpath,
346 const struct stat *sb,
347 int tflag,
348 struct FTW *ftwbuf) {
349
9fe117ea 350 /* No need to label /dev twice in a row... */
edb49778
LP
351 if (_unlikely_(ftwbuf->level == 0))
352 return FTW_CONTINUE;
353
c9bc0764 354 label_fix(fpath, false, false);
af65c248 355
edb49778 356 /* /run/initramfs is static data and big, no need to
af65c248 357 * dynamically relabel its contents at boot... */
edb49778
LP
358 if (_unlikely_(ftwbuf->level == 1 &&
359 tflag == FTW_D &&
360 streq(fpath, "/run/initramfs")))
361 return FTW_SKIP_SUBTREE;
9fe117ea 362
edb49778 363 return FTW_CONTINUE;
1829dc9d
LP
364};
365
0b3325e7 366int mount_setup(bool loaded_policy) {
8e274523 367 int r;
dad08730 368 unsigned i;
8e274523 369
4ef31082
LP
370 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
371 r = mount_one(mount_table + i, true);
372
373 if (r < 0)
8e274523 374 return r;
4ef31082 375 }
8e274523 376
f1d19aa4
LP
377 /* Nodes in devtmpfs and /run need to be manually updated for
378 * the appropriate labels, after mounting. The other virtual
379 * API file systems like /sys and /proc do not need that, they
380 * use the same label for all their files. */
0b3325e7
LP
381 if (loaded_policy) {
382 usec_t before_relabel, after_relabel;
383 char timespan[FORMAT_TIMESPAN_MAX];
384
385 before_relabel = now(CLOCK_MONOTONIC);
386
edb49778
LP
387 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
388 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7
LP
389
390 after_relabel = now(CLOCK_MONOTONIC);
391
392 log_info("Relabelled /dev and /run in %s.",
2fa4092c 393 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 394 }
1829dc9d 395
5c0532d1 396 /* Create a few default symlinks, which are normally created
f1d19aa4 397 * by udevd, but some scripts might need them before we start
5c0532d1 398 * udevd. */
01ed0e23 399 dev_setup(NULL);
5c0532d1 400
b3ac5f8c
LP
401 /* Mark the root directory as shared in regards to mount
402 * propagation. The kernel defaults to "private", but we think
403 * it makes more sense to have a default of "shared" so that
404 * nspawn and the container tools work out of the box. If
405 * specific setups need other settings they can reset the
406 * propagation mode to private if needed. */
c481f78b
LP
407 if (detect_container(NULL) <= 0)
408 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
409 log_warning("Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 410
66e41181
LP
411 /* Create a few directories we always want around, Note that
412 * sd_booted() checks for /run/systemd/system, so this mkdir
413 * really needs to stay for good, otherwise software that
414 * copied sd-daemon.c into their sources will misdetect
415 * systemd. */
d2e54fae
KS
416 mkdir_label("/run/systemd", 0755);
417 mkdir_label("/run/systemd/system", 0755);
c17ec25e 418 mkdir_label("/run/systemd/inaccessible", 0000);
b925e726 419
0c85a4f3 420 return 0;
8e274523 421}