]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
macro: introduce TAKE_PTR() macro
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8e274523
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 15 Lesser General Public License for more details.
8e274523 16
5430f7f2 17 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
8e274523 21#include <errno.h>
cf0fbc49 22#include <ftw.h>
8e274523 23#include <stdlib.h>
cf0fbc49 24#include <sys/mount.h>
5c0532d1 25#include <unistd.h>
8e274523 26
b5efdb8a 27#include "alloc-util.h"
64824462 28#include "bus-util.h"
4349cd7c
LP
29#include "cgroup-util.h"
30#include "dev-setup.h"
31#include "efivars.h"
e07aefbd 32#include "fileio.h"
c4b41707 33#include "fs-util.h"
4349cd7c 34#include "label.h"
8e274523 35#include "log.h"
c9af1080 36#include "macro.h"
4349cd7c 37#include "missing.h"
49e942b2 38#include "mkdir.h"
4349cd7c
LP
39#include "mount-setup.h"
40#include "mount-util.h"
9eb977db 41#include "path-util.h"
4349cd7c 42#include "set.h"
8552b176 43#include "smack-util.h"
4349cd7c 44#include "strv.h"
ee104e11 45#include "user-util.h"
4349cd7c
LP
46#include "util.h"
47#include "virt.h"
bef2733f 48
6aa220e0 49typedef enum MountMode {
e07aefbd
CB
50 MNT_NONE = 0,
51 MNT_FATAL = 1 << 0,
52 MNT_IN_CONTAINER = 1 << 1,
53 MNT_CHECK_WRITABLE = 1 << 2,
6aa220e0
KS
54} MountMode;
55
ca714c0e
LP
56typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
6aa220e0
KS
62 bool (*condition_fn)(void);
63 MountMode mode;
ca714c0e
LP
64} MountPoint;
65
4ef31082 66/* The first three entries we might need before SELinux is up. The
160481f6 67 * fourth (securityfs) is needed by IMA to load a custom policy. The
7c96ab1d
LP
68 * other ones we can delay until SELinux and IMA are loaded. When
69 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
f9fa32f0 70#if ENABLE_SMACK
ffbd2c4d 71#define N_EARLY_MOUNT 5
7c96ab1d
LP
72#else
73#define N_EARLY_MOUNT 4
74#endif
4ef31082 75
ca714c0e 76static const MountPoint mount_table[] = {
68d4c452
LP
77 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
82 NULL, MNT_FATAL|MNT_IN_CONTAINER },
83 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
84 NULL, MNT_NONE },
f9fa32f0 85#if ENABLE_SMACK
68d4c452
LP
86 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
87 mac_smack_use, MNT_FATAL },
88 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
89 mac_smack_use, MNT_FATAL },
d407c940 90#endif
68d4c452
LP
91 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
92 NULL, MNT_FATAL|MNT_IN_CONTAINER },
93 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
94 NULL, MNT_IN_CONTAINER },
f9fa32f0 95#if ENABLE_SMACK
68d4c452
LP
96 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
97 mac_smack_use, MNT_FATAL },
d407c940 98#endif
68d4c452
LP
99 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
65900808 101 { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2d56b80a 102 cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
65900808 103 { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
2d56b80a 104 cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
68d4c452 105 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
efdb0237 106 cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
65900808 107 { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
e07aefbd 108 cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
65900808 109 { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
e07aefbd 110 cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
68d4c452 111 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2977724b 112 cg_is_legacy_wanted, MNT_IN_CONTAINER },
68d4c452 113 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2977724b 114 cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
68d4c452
LP
115 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
116 NULL, MNT_NONE },
349cc4a5 117#if ENABLE_EFI
68d4c452
LP
118 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
119 is_efi_boot, MNT_NONE },
c06bf414 120#endif
39f305a9 121 { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV,
43b7f24b 122 NULL, MNT_NONE, },
63cc4c31
DM
123};
124
949c6510 125/* These are API file systems that might be mounted by other software,
46ff0ed7 126 * we just list them here so that we know that we should ignore them */
949c6510 127
eaeb18db
LP
128static const char ignore_paths[] =
129 /* SELinux file systems */
130 "/sys/fs/selinux\0"
eaeb18db
LP
131 /* Container bind mounts */
132 "/proc/sys\0"
133 "/dev/console\0"
c481f78b 134 "/proc/kmsg\0";
949c6510 135
dad08730
LP
136bool mount_point_is_api(const char *path) {
137 unsigned i;
138
139 /* Checks if this mount point is considered "API", and hence
140 * should be ignored */
141
ca714c0e 142 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 143 if (path_equal(path, mount_table[i].where))
dad08730
LP
144 return true;
145
57f2a956
KS
146 return path_startswith(path, "/sys/fs/cgroup/");
147}
148
149bool mount_point_ignore(const char *path) {
eaeb18db 150 const char *i;
57f2a956 151
eaeb18db
LP
152 NULSTR_FOREACH(i, ignore_paths)
153 if (path_equal(path, i))
949c6510
LP
154 return true;
155
57f2a956 156 return false;
dad08730
LP
157}
158
4ef31082 159static int mount_one(const MountPoint *p, bool relabel) {
713a8875 160 int r, priority;
8e274523 161
ca714c0e 162 assert(p);
8e274523 163
713a8875
LP
164 priority = (p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
165
6aa220e0
KS
166 if (p->condition_fn && !p->condition_fn())
167 return 0;
168
51b4af2c 169 /* Relabel first, just in case */
4ef31082 170 if (relabel)
1411b094 171 (void) label_fix(p->where, true, true);
51b4af2c 172
e1873695 173 r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
1411b094 174 if (r < 0 && r != -ENOENT) {
713a8875 175 log_full_errno(priority, r, "Failed to determine whether %s is a mount point: %m", p->where);
1411b094
LP
176 return (p->mode & MNT_FATAL) ? r : 0;
177 }
8e274523 178 if (r > 0)
51b4af2c 179 return 0;
8e274523 180
c481f78b 181 /* Skip securityfs in a container */
75f86906 182 if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
c481f78b
LP
183 return 0;
184
a04f58d6
LP
185 /* The access mode here doesn't really matter too much, since
186 * the mounted file system will take precedence anyway. */
c4bfd169 187 if (relabel)
1411b094 188 (void) mkdir_p_label(p->where, 0755);
c4bfd169 189 else
1411b094 190 (void) mkdir_p(p->where, 0755);
a04f58d6 191
8e274523 192 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
193 p->what,
194 p->where,
195 p->type,
196 strna(p->options));
197
198 if (mount(p->what,
199 p->where,
200 p->type,
201 p->flags,
202 p->options) < 0) {
713a8875 203 log_full_errno(priority, errno, "Failed to mount %s at %s: %m", p->type, p->where);
6aa220e0 204 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
205 }
206
51b4af2c 207 /* Relabel again, since we now mounted something fresh here */
4ef31082 208 if (relabel)
1411b094 209 (void) label_fix(p->where, false, false);
5275d3c1 210
e07aefbd 211 if (p->mode & MNT_CHECK_WRITABLE) {
713a8875
LP
212 if (access(p->where, W_OK) < 0) {
213 r = -errno;
214
e07aefbd 215 (void) umount(p->where);
1ff654e2 216 (void) rmdir(p->where);
713a8875
LP
217
218 log_full_errno(priority, r, "Mount point %s not writable after mounting: %m", p->where);
e07aefbd
CB
219 return (p->mode & MNT_FATAL) ? r : 0;
220 }
221 }
222
0c85a4f3 223 return 1;
8e274523
LP
224}
225
400fac06 226static int mount_points_setup(unsigned n, bool loaded_policy) {
4ef31082
LP
227 unsigned i;
228 int r = 0;
229
400fac06 230 for (i = 0; i < n; i ++) {
4ef31082
LP
231 int j;
232
400fac06 233 j = mount_one(mount_table + i, loaded_policy);
7ff307bc 234 if (j != 0 && r >= 0)
4ef31082
LP
235 r = j;
236 }
237
238 return r;
239}
240
400fac06
AK
241int mount_setup_early(void) {
242 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
243
244 /* Do a minimal mount of /proc and friends to enable the most
245 * basic stuff, such as SELinux */
246 return mount_points_setup(N_EARLY_MOUNT, false);
247}
248
0c85a4f3 249int mount_cgroup_controllers(char ***join_controllers) {
a6b26d90 250 _cleanup_set_free_free_ Set *controllers = NULL;
5cbaad2f 251 bool has_argument = !!join_controllers;
a641dcd9 252 int r;
2076ca54 253
efdb0237
LP
254 if (!cg_is_legacy_wanted())
255 return 0;
256
670802d4 257 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 258
5cbaad2f 259 if (!has_argument)
56c8d744
ZJS
260 /* The defaults:
261 * mount "cpu" + "cpuacct" together, and "net_cls" + "net_prio".
262 *
263 * We'd like to add "cpuset" to the mix, but "cpuset" doesn't really
264 * work for groups with no initialized attributes.
265 */
266 join_controllers = (char**[]) {
267 STRV_MAKE("cpu", "cpuacct"),
268 STRV_MAKE("net_cls", "net_prio"),
269 NULL,
270 };
271
6925a0de 272 r = cg_kernel_controllers(&controllers);
b12afc8c
LP
273 if (r < 0)
274 return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
0c85a4f3
LP
275
276 for (;;) {
a641dcd9 277 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
a6b26d90
ZJS
278 MountPoint p = {
279 .what = "cgroup",
280 .type = "cgroup",
281 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
282 .mode = MNT_IN_CONTAINER,
283 };
0c85a4f3
LP
284 char ***k = NULL;
285
286 controller = set_steal_first(controllers);
287 if (!controller)
288 break;
289
56c8d744
ZJS
290 for (k = join_controllers; *k; k++)
291 if (strv_find(*k, controller))
292 break;
0c85a4f3
LP
293
294 if (k && *k) {
295 char **i, **j;
296
297 for (i = *k, j = *k; *i; i++) {
298
299 if (!streq(*i, controller)) {
a641dcd9 300 _cleanup_free_ char *t;
0c85a4f3
LP
301
302 t = set_remove(controllers, *i);
303 if (!t) {
5cbaad2f
YW
304 if (has_argument)
305 free(*i);
0c85a4f3
LP
306 continue;
307 }
0c85a4f3
LP
308 }
309
310 *(j++) = *i;
311 }
312
313 *j = NULL;
314
315 options = strv_join(*k, ",");
a6b26d90
ZJS
316 if (!options)
317 return log_oom();
ae2a15bc
LP
318 } else
319 options = TAKE_PTR(controller);
0c85a4f3 320
a641dcd9
LP
321 where = strappend("/sys/fs/cgroup/", options);
322 if (!where)
323 return log_oom();
324
325 p.where = where;
0c85a4f3 326 p.options = options;
2076ca54 327
4ef31082 328 r = mount_one(&p, true);
a6b26d90
ZJS
329 if (r < 0)
330 return r;
0c85a4f3
LP
331
332 if (r > 0 && k && *k) {
333 char **i;
334
335 for (i = *k; *i; i++) {
a641dcd9
LP
336 _cleanup_free_ char *t = NULL;
337
338 t = strappend("/sys/fs/cgroup/", *i);
339 if (!t)
340 return log_oom();
0c85a4f3
LP
341
342 r = symlink(options, t);
ea2b93a8 343 if (r >= 0) {
f8c1a81c 344#ifdef SMACK_RUN_LABEL
ea2b93a8
PO
345 _cleanup_free_ char *src;
346 src = strappend("/sys/fs/cgroup/", options);
347 if (!src)
348 return log_oom();
349 r = mac_smack_copy(t, src);
350 if (r < 0 && r != -EOPNOTSUPP)
351 return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", src, t);
f8c1a81c 352#endif
ea2b93a8
PO
353 } else if (errno != EEXIST)
354 return log_error_errno(errno, "Failed to create symlink %s: %m", t);
0c85a4f3
LP
355 }
356 }
2076ca54
LP
357 }
358
679142ce
LP
359 /* Now that we mounted everything, let's make the tmpfs the
360 * cgroup file systems are mounted into read-only. */
b12afc8c 361 (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
679142ce 362
a6b26d90 363 return 0;
2076ca54
LP
364}
365
f9fa32f0 366#if HAVE_SELINUX || ENABLE_SMACK
1829dc9d
LP
367static int nftw_cb(
368 const char *fpath,
369 const struct stat *sb,
370 int tflag,
371 struct FTW *ftwbuf) {
372
9fe117ea 373 /* No need to label /dev twice in a row... */
edb49778
LP
374 if (_unlikely_(ftwbuf->level == 0))
375 return FTW_CONTINUE;
376
c9bc0764 377 label_fix(fpath, false, false);
af65c248 378
edb49778 379 /* /run/initramfs is static data and big, no need to
af65c248 380 * dynamically relabel its contents at boot... */
edb49778
LP
381 if (_unlikely_(ftwbuf->level == 1 &&
382 tflag == FTW_D &&
383 streq(fpath, "/run/initramfs")))
384 return FTW_SKIP_SUBTREE;
9fe117ea 385
edb49778 386 return FTW_CONTINUE;
1829dc9d 387};
0fff82e5 388#endif
1829dc9d 389
0b3325e7 390int mount_setup(bool loaded_policy) {
68d4c452 391 int r = 0;
8e274523 392
400fac06 393 r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
68d4c452
LP
394 if (r < 0)
395 return r;
396
f9fa32f0 397#if HAVE_SELINUX || ENABLE_SMACK
f1d19aa4
LP
398 /* Nodes in devtmpfs and /run need to be manually updated for
399 * the appropriate labels, after mounting. The other virtual
400 * API file systems like /sys and /proc do not need that, they
401 * use the same label for all their files. */
0b3325e7
LP
402 if (loaded_policy) {
403 usec_t before_relabel, after_relabel;
404 char timespan[FORMAT_TIMESPAN_MAX];
405
406 before_relabel = now(CLOCK_MONOTONIC);
407
edb49778 408 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
cacf980e 409 nftw("/dev/shm", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
edb49778 410 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7 411
8739f23e
KN
412 /* Temporarily remount the root cgroup filesystem to give it a proper label. */
413 r = cg_all_unified();
414 if (r == 0) {
415 (void) mount(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
416 label_fix("/sys/fs/cgroup", false, false);
417 nftw("/sys/fs/cgroup", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
418 (void) mount(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
419 } else if (r < 0)
420 return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m");
421
0b3325e7
LP
422 after_relabel = now(CLOCK_MONOTONIC);
423
8739f23e 424 log_info("Relabelled /dev, /run and /sys/fs/cgroup in %s.",
2fa4092c 425 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 426 }
0fff82e5 427#endif
1829dc9d 428
5c0532d1 429 /* Create a few default symlinks, which are normally created
f1d19aa4 430 * by udevd, but some scripts might need them before we start
5c0532d1 431 * udevd. */
03cfe0d5 432 dev_setup(NULL, UID_INVALID, GID_INVALID);
5c0532d1 433
dee22f39
LP
434 /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
435 * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
436 * the box. If specific setups need other settings they can reset the propagation mode to private if
437 * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
438 * container manager we assume the container manager knows what it is doing (for example, because it set up
439 * some directories with different propagation modes). */
75f86906 440 if (detect_container() <= 0)
c481f78b 441 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
56f64d95 442 log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 443
dee22f39
LP
444 /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
445 * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
446 * misdetect systemd. */
c4b41707
AP
447 (void) mkdir_label("/run/systemd", 0755);
448 (void) mkdir_label("/run/systemd/system", 0755);
dee22f39 449
c4b41707 450 /* Set up inaccessible items */
dee22f39 451 (void) mkdir_label("/run/systemd/inaccessible", 0000);
c4b41707
AP
452 (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
453 (void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
454 (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));
455 (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0));
456 (void) mkfifo("/run/systemd/inaccessible/fifo", 0000);
457 (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0);
b925e726 458
0c85a4f3 459 return 0;
8e274523 460}