]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
update TODO
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23#include <errno.h>
24#include <sys/stat.h>
25#include <stdlib.h>
26#include <string.h>
27#include <libgen.h>
28#include <assert.h>
5c0532d1 29#include <unistd.h>
1829dc9d 30#include <ftw.h>
8e274523
LP
31
32#include "mount-setup.h"
5ba2dc25 33#include "dev-setup.h"
8e274523 34#include "log.h"
c9af1080
LP
35#include "macro.h"
36#include "util.h"
5275d3c1 37#include "label.h"
0c85a4f3
LP
38#include "set.h"
39#include "strv.h"
49e942b2 40#include "mkdir.h"
9eb977db 41#include "path-util.h"
48ac500b 42#include "missing.h"
c481f78b 43#include "virt.h"
34e5a31e 44#include "efivars.h"
8552b176 45#include "smack-util.h"
7f112f50 46#include "def.h"
bef2733f 47
6aa220e0
KS
48typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52} MountMode;
53
ca714c0e
LP
54typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
6aa220e0
KS
60 bool (*condition_fn)(void);
61 MountMode mode;
ca714c0e
LP
62} MountPoint;
63
4ef31082 64/* The first three entries we might need before SELinux is up. The
160481f6
RS
65 * fourth (securityfs) is needed by IMA to load a custom policy. The
66 * other ones we can delay until SELinux and IMA are loaded. */
ffbd2c4d 67#define N_EARLY_MOUNT 5
4ef31082 68
ca714c0e 69static const MountPoint mount_table[] = {
6aa220e0
KS
70 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
71 NULL, MNT_FATAL|MNT_IN_CONTAINER },
d1d8e5d4
RLM
72 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
73 NULL, MNT_FATAL|MNT_IN_CONTAINER },
6aa220e0
KS
74 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
75 NULL, MNT_FATAL|MNT_IN_CONTAINER },
76 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
77 NULL, MNT_NONE },
d407c940 78#ifdef HAVE_SMACK
8552b176
AK
79 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
80 use_smack, MNT_FATAL },
d407c940 81 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
8552b176 82 use_smack, MNT_FATAL },
d407c940 83#endif
6aa220e0
KS
84 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
85 NULL, MNT_FATAL|MNT_IN_CONTAINER },
86 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
87 NULL, MNT_IN_CONTAINER },
d407c940
AK
88#ifdef HAVE_SMACK
89 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
8552b176 90 use_smack, MNT_FATAL },
d407c940 91#endif
6aa220e0
KS
92 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
93 NULL, MNT_FATAL|MNT_IN_CONTAINER },
94 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
99a17ada 95 NULL, MNT_FATAL|MNT_IN_CONTAINER },
0ee67958 96 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
908f8b9c 97 NULL, MNT_IN_CONTAINER },
0ee67958 98 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
99a17ada 99 NULL, MNT_FATAL|MNT_IN_CONTAINER },
c06bf414
KS
100 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
101 NULL, MNT_NONE },
102#ifdef ENABLE_EFI
103 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 is_efi_boot, MNT_NONE },
105#endif
8e274523
LP
106};
107
949c6510 108/* These are API file systems that might be mounted by other software,
46ff0ed7 109 * we just list them here so that we know that we should ignore them */
949c6510 110
eaeb18db
LP
111static const char ignore_paths[] =
112 /* SELinux file systems */
113 "/sys/fs/selinux\0"
114 "/selinux\0"
115 /* Legacy cgroup mount points */
116 "/dev/cgroup\0"
117 "/cgroup\0"
118 /* Legacy kernel file system */
119 "/proc/bus/usb\0"
120 /* Container bind mounts */
121 "/proc/sys\0"
122 "/dev/console\0"
c481f78b 123 "/proc/kmsg\0";
949c6510 124
dad08730
LP
125bool mount_point_is_api(const char *path) {
126 unsigned i;
127
128 /* Checks if this mount point is considered "API", and hence
129 * should be ignored */
130
ca714c0e 131 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 132 if (path_equal(path, mount_table[i].where))
dad08730
LP
133 return true;
134
57f2a956
KS
135 return path_startswith(path, "/sys/fs/cgroup/");
136}
137
138bool mount_point_ignore(const char *path) {
eaeb18db 139 const char *i;
57f2a956 140
eaeb18db
LP
141 NULSTR_FOREACH(i, ignore_paths)
142 if (path_equal(path, i))
949c6510
LP
143 return true;
144
57f2a956 145 return false;
dad08730
LP
146}
147
4ef31082 148static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
149 int r;
150
ca714c0e 151 assert(p);
8e274523 152
6aa220e0
KS
153 if (p->condition_fn && !p->condition_fn())
154 return 0;
155
51b4af2c 156 /* Relabel first, just in case */
4ef31082 157 if (relabel)
c9bc0764 158 label_fix(p->where, true, true);
51b4af2c 159
c481f78b
LP
160 r = path_is_mount_point(p->where, true);
161 if (r < 0)
8e274523
LP
162 return r;
163
164 if (r > 0)
51b4af2c 165 return 0;
8e274523 166
c481f78b 167 /* Skip securityfs in a container */
6aa220e0 168 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
c481f78b
LP
169 return 0;
170
a04f58d6
LP
171 /* The access mode here doesn't really matter too much, since
172 * the mounted file system will take precedence anyway. */
c4bfd169
LP
173 if (relabel)
174 mkdir_p_label(p->where, 0755);
175 else
176 mkdir_p(p->where, 0755);
a04f58d6 177
8e274523 178 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
179 p->what,
180 p->where,
181 p->type,
182 strna(p->options));
183
184 if (mount(p->what,
185 p->where,
186 p->type,
187 p->flags,
188 p->options) < 0) {
99a17ada 189 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
6aa220e0 190 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
191 }
192
51b4af2c 193 /* Relabel again, since we now mounted something fresh here */
4ef31082 194 if (relabel)
c9bc0764 195 label_fix(p->where, false, false);
5275d3c1 196
0c85a4f3 197 return 1;
8e274523
LP
198}
199
4ef31082
LP
200int mount_setup_early(void) {
201 unsigned i;
202 int r = 0;
203
204 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
205
206 /* Do a minimal mount of /proc and friends to enable the most
207 * basic stuff, such as SELinux */
208 for (i = 0; i < N_EARLY_MOUNT; i ++) {
209 int j;
210
211 j = mount_one(mount_table + i, false);
212 if (r == 0)
213 r = j;
214 }
215
216 return r;
217}
218
0c85a4f3 219int mount_cgroup_controllers(char ***join_controllers) {
a6b26d90
ZJS
220 _cleanup_set_free_free_ Set *controllers = NULL;
221 _cleanup_fclose_ FILE *f;
a641dcd9
LP
222 char buf[LINE_MAX];
223 int r;
2076ca54 224
670802d4 225 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 226
0c85a4f3
LP
227 f = fopen("/proc/cgroups", "re");
228 if (!f) {
016e9849
LP
229 log_error("Failed to enumerate cgroup controllers: %m");
230 return 0;
231 }
2076ca54 232
0c85a4f3 233 controllers = set_new(string_hash_func, string_compare_func);
a6b26d90
ZJS
234 if (!controllers)
235 return log_oom();
0c85a4f3 236
2076ca54 237 /* Ignore the header line */
bab45044 238 (void) fgets(buf, sizeof(buf), f);
2076ca54
LP
239
240 for (;;) {
0c85a4f3
LP
241 char *controller;
242 int enabled = 0;
2076ca54 243
16f6682d 244 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076ca54
LP
245
246 if (feof(f))
247 break;
248
249 log_error("Failed to parse /proc/cgroups.");
a6b26d90 250 return -EIO;
2076ca54
LP
251 }
252
600a328f
LP
253 if (!enabled) {
254 free(controller);
255 continue;
256 }
257
ef42202a 258 r = set_consume(controllers, controller);
0c85a4f3
LP
259 if (r < 0) {
260 log_error("Failed to add controller to set.");
a6b26d90 261 return r;
0c85a4f3
LP
262 }
263 }
264
265 for (;;) {
a641dcd9 266 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
a6b26d90
ZJS
267 MountPoint p = {
268 .what = "cgroup",
269 .type = "cgroup",
270 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
271 .mode = MNT_IN_CONTAINER,
272 };
0c85a4f3
LP
273 char ***k = NULL;
274
275 controller = set_steal_first(controllers);
276 if (!controller)
277 break;
278
279 if (join_controllers)
280 for (k = join_controllers; *k; k++)
281 if (strv_find(*k, controller))
282 break;
283
284 if (k && *k) {
285 char **i, **j;
286
287 for (i = *k, j = *k; *i; i++) {
288
289 if (!streq(*i, controller)) {
a641dcd9 290 _cleanup_free_ char *t;
0c85a4f3
LP
291
292 t = set_remove(controllers, *i);
293 if (!t) {
294 free(*i);
295 continue;
296 }
0c85a4f3
LP
297 }
298
299 *(j++) = *i;
300 }
301
302 *j = NULL;
303
304 options = strv_join(*k, ",");
a6b26d90
ZJS
305 if (!options)
306 return log_oom();
0c85a4f3
LP
307 } else {
308 options = controller;
309 controller = NULL;
310 }
311
a641dcd9
LP
312 where = strappend("/sys/fs/cgroup/", options);
313 if (!where)
314 return log_oom();
315
316 p.where = where;
0c85a4f3 317 p.options = options;
2076ca54 318
4ef31082 319 r = mount_one(&p, true);
a6b26d90
ZJS
320 if (r < 0)
321 return r;
0c85a4f3
LP
322
323 if (r > 0 && k && *k) {
324 char **i;
325
326 for (i = *k; *i; i++) {
a641dcd9
LP
327 _cleanup_free_ char *t = NULL;
328
329 t = strappend("/sys/fs/cgroup/", *i);
330 if (!t)
331 return log_oom();
0c85a4f3
LP
332
333 r = symlink(options, t);
0c85a4f3 334 if (r < 0 && errno != EEXIST) {
c79bb9e4 335 log_error("Failed to create symlink %s: %m", t);
a6b26d90 336 return -errno;
0c85a4f3
LP
337 }
338 }
339 }
2076ca54
LP
340 }
341
679142ce
LP
342 /* Now that we mounted everything, let's make the tmpfs the
343 * cgroup file systems are mounted into read-only. */
344 mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
345
a6b26d90 346 return 0;
2076ca54
LP
347}
348
1829dc9d
LP
349static int nftw_cb(
350 const char *fpath,
351 const struct stat *sb,
352 int tflag,
353 struct FTW *ftwbuf) {
354
9fe117ea 355 /* No need to label /dev twice in a row... */
edb49778
LP
356 if (_unlikely_(ftwbuf->level == 0))
357 return FTW_CONTINUE;
358
c9bc0764 359 label_fix(fpath, false, false);
af65c248 360
edb49778 361 /* /run/initramfs is static data and big, no need to
af65c248 362 * dynamically relabel its contents at boot... */
edb49778
LP
363 if (_unlikely_(ftwbuf->level == 1 &&
364 tflag == FTW_D &&
365 streq(fpath, "/run/initramfs")))
366 return FTW_SKIP_SUBTREE;
9fe117ea 367
edb49778 368 return FTW_CONTINUE;
1829dc9d
LP
369};
370
0b3325e7 371int mount_setup(bool loaded_policy) {
8e274523 372 int r;
dad08730 373 unsigned i;
8e274523 374
4ef31082
LP
375 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
376 r = mount_one(mount_table + i, true);
377
378 if (r < 0)
8e274523 379 return r;
4ef31082 380 }
8e274523 381
f1d19aa4
LP
382 /* Nodes in devtmpfs and /run need to be manually updated for
383 * the appropriate labels, after mounting. The other virtual
384 * API file systems like /sys and /proc do not need that, they
385 * use the same label for all their files. */
0b3325e7
LP
386 if (loaded_policy) {
387 usec_t before_relabel, after_relabel;
388 char timespan[FORMAT_TIMESPAN_MAX];
389
390 before_relabel = now(CLOCK_MONOTONIC);
391
edb49778
LP
392 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
393 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7
LP
394
395 after_relabel = now(CLOCK_MONOTONIC);
396
397 log_info("Relabelled /dev and /run in %s.",
2fa4092c 398 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 399 }
1829dc9d 400
5c0532d1 401 /* Create a few default symlinks, which are normally created
f1d19aa4 402 * by udevd, but some scripts might need them before we start
5c0532d1 403 * udevd. */
01ed0e23 404 dev_setup(NULL);
5c0532d1 405
b3ac5f8c
LP
406 /* Mark the root directory as shared in regards to mount
407 * propagation. The kernel defaults to "private", but we think
408 * it makes more sense to have a default of "shared" so that
409 * nspawn and the container tools work out of the box. If
410 * specific setups need other settings they can reset the
411 * propagation mode to private if needed. */
c481f78b
LP
412 if (detect_container(NULL) <= 0)
413 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
414 log_warning("Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 415
66e41181
LP
416 /* Create a few directories we always want around, Note that
417 * sd_booted() checks for /run/systemd/system, so this mkdir
418 * really needs to stay for good, otherwise software that
419 * copied sd-daemon.c into their sources will misdetect
420 * systemd. */
d2e54fae
KS
421 mkdir_label("/run/systemd", 0755);
422 mkdir_label("/run/systemd/system", 0755);
c17ec25e 423 mkdir_label("/run/systemd/inaccessible", 0000);
b925e726 424
0c85a4f3 425 return 0;
8e274523 426}