]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/mount-setup.c
networkd: auto promote links if "promote_secondaries" is unset (#7167)
[thirdparty/systemd.git] / src / core / mount-setup.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8e274523
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 15 Lesser General Public License for more details.
8e274523 16
5430f7f2 17 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
8e274523 21#include <errno.h>
cf0fbc49 22#include <ftw.h>
8e274523 23#include <stdlib.h>
cf0fbc49 24#include <sys/mount.h>
5c0532d1 25#include <unistd.h>
8e274523 26
b5efdb8a 27#include "alloc-util.h"
64824462 28#include "bus-util.h"
4349cd7c
LP
29#include "cgroup-util.h"
30#include "dev-setup.h"
31#include "efivars.h"
e07aefbd 32#include "fileio.h"
c4b41707 33#include "fs-util.h"
4349cd7c 34#include "label.h"
8e274523 35#include "log.h"
c9af1080 36#include "macro.h"
4349cd7c 37#include "missing.h"
49e942b2 38#include "mkdir.h"
4349cd7c
LP
39#include "mount-setup.h"
40#include "mount-util.h"
9eb977db 41#include "path-util.h"
4349cd7c 42#include "set.h"
8552b176 43#include "smack-util.h"
4349cd7c 44#include "strv.h"
ee104e11 45#include "user-util.h"
4349cd7c
LP
46#include "util.h"
47#include "virt.h"
bef2733f 48
6aa220e0 49typedef enum MountMode {
e07aefbd
CB
50 MNT_NONE = 0,
51 MNT_FATAL = 1 << 0,
52 MNT_IN_CONTAINER = 1 << 1,
53 MNT_CHECK_WRITABLE = 1 << 2,
6aa220e0
KS
54} MountMode;
55
ca714c0e
LP
56typedef struct MountPoint {
57 const char *what;
58 const char *where;
59 const char *type;
60 const char *options;
61 unsigned long flags;
6aa220e0
KS
62 bool (*condition_fn)(void);
63 MountMode mode;
ca714c0e
LP
64} MountPoint;
65
4ef31082 66/* The first three entries we might need before SELinux is up. The
160481f6 67 * fourth (securityfs) is needed by IMA to load a custom policy. The
7c96ab1d
LP
68 * other ones we can delay until SELinux and IMA are loaded. When
69 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
f9fa32f0 70#if ENABLE_SMACK
ffbd2c4d 71#define N_EARLY_MOUNT 5
7c96ab1d
LP
72#else
73#define N_EARLY_MOUNT 4
74#endif
4ef31082 75
ca714c0e 76static const MountPoint mount_table[] = {
68d4c452
LP
77 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
82 NULL, MNT_FATAL|MNT_IN_CONTAINER },
83 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
84 NULL, MNT_NONE },
f9fa32f0 85#if ENABLE_SMACK
68d4c452
LP
86 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
87 mac_smack_use, MNT_FATAL },
88 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
89 mac_smack_use, MNT_FATAL },
d407c940 90#endif
68d4c452
LP
91 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
92 NULL, MNT_FATAL|MNT_IN_CONTAINER },
93 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
94 NULL, MNT_IN_CONTAINER },
f9fa32f0 95#if ENABLE_SMACK
68d4c452
LP
96 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
97 mac_smack_use, MNT_FATAL },
d407c940 98#endif
68d4c452
LP
99 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
4095205e 101 { "cgroup", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2d56b80a 102 cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
09961995 103 { "cgroup", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
2d56b80a 104 cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
68d4c452 105 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
efdb0237 106 cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
4095205e 107 { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
e07aefbd 108 cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
2977724b 109 { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
e07aefbd 110 cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
68d4c452 111 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2977724b 112 cg_is_legacy_wanted, MNT_IN_CONTAINER },
68d4c452 113 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
2977724b 114 cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
68d4c452
LP
115 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
116 NULL, MNT_NONE },
349cc4a5 117#if ENABLE_EFI
68d4c452
LP
118 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
119 is_efi_boot, MNT_NONE },
c06bf414 120#endif
63cc4c31
DM
121};
122
949c6510 123/* These are API file systems that might be mounted by other software,
46ff0ed7 124 * we just list them here so that we know that we should ignore them */
949c6510 125
eaeb18db
LP
126static const char ignore_paths[] =
127 /* SELinux file systems */
128 "/sys/fs/selinux\0"
eaeb18db
LP
129 /* Container bind mounts */
130 "/proc/sys\0"
131 "/dev/console\0"
c481f78b 132 "/proc/kmsg\0";
949c6510 133
dad08730
LP
134bool mount_point_is_api(const char *path) {
135 unsigned i;
136
137 /* Checks if this mount point is considered "API", and hence
138 * should be ignored */
139
ca714c0e 140 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
449ddb2d 141 if (path_equal(path, mount_table[i].where))
dad08730
LP
142 return true;
143
57f2a956
KS
144 return path_startswith(path, "/sys/fs/cgroup/");
145}
146
147bool mount_point_ignore(const char *path) {
eaeb18db 148 const char *i;
57f2a956 149
eaeb18db
LP
150 NULSTR_FOREACH(i, ignore_paths)
151 if (path_equal(path, i))
949c6510
LP
152 return true;
153
57f2a956 154 return false;
dad08730
LP
155}
156
4ef31082 157static int mount_one(const MountPoint *p, bool relabel) {
8e274523
LP
158 int r;
159
ca714c0e 160 assert(p);
8e274523 161
6aa220e0
KS
162 if (p->condition_fn && !p->condition_fn())
163 return 0;
164
51b4af2c 165 /* Relabel first, just in case */
4ef31082 166 if (relabel)
1411b094 167 (void) label_fix(p->where, true, true);
51b4af2c 168
e1873695 169 r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
1411b094
LP
170 if (r < 0 && r != -ENOENT) {
171 log_full_errno((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, r, "Failed to determine whether %s is a mount point: %m", p->where);
172 return (p->mode & MNT_FATAL) ? r : 0;
173 }
8e274523 174 if (r > 0)
51b4af2c 175 return 0;
8e274523 176
c481f78b 177 /* Skip securityfs in a container */
75f86906 178 if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
c481f78b
LP
179 return 0;
180
a04f58d6
LP
181 /* The access mode here doesn't really matter too much, since
182 * the mounted file system will take precedence anyway. */
c4bfd169 183 if (relabel)
1411b094 184 (void) mkdir_p_label(p->where, 0755);
c4bfd169 185 else
1411b094 186 (void) mkdir_p(p->where, 0755);
a04f58d6 187
8e274523 188 log_debug("Mounting %s to %s of type %s with options %s.",
ca714c0e
LP
189 p->what,
190 p->where,
191 p->type,
192 strna(p->options));
193
194 if (mount(p->what,
195 p->where,
196 p->type,
197 p->flags,
198 p->options) < 0) {
1411b094 199 log_full_errno((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, errno, "Failed to mount %s at %s: %m", p->type, p->where);
6aa220e0 200 return (p->mode & MNT_FATAL) ? -errno : 0;
8e274523
LP
201 }
202
51b4af2c 203 /* Relabel again, since we now mounted something fresh here */
4ef31082 204 if (relabel)
1411b094 205 (void) label_fix(p->where, false, false);
5275d3c1 206
e07aefbd
CB
207 if (p->mode & MNT_CHECK_WRITABLE) {
208 r = access(p->where, W_OK);
209 if (r < 0) {
210 (void) umount(p->where);
211 return (p->mode & MNT_FATAL) ? r : 0;
212 }
213 }
214
0c85a4f3 215 return 1;
8e274523
LP
216}
217
400fac06 218static int mount_points_setup(unsigned n, bool loaded_policy) {
4ef31082
LP
219 unsigned i;
220 int r = 0;
221
400fac06 222 for (i = 0; i < n; i ++) {
4ef31082
LP
223 int j;
224
400fac06 225 j = mount_one(mount_table + i, loaded_policy);
7ff307bc 226 if (j != 0 && r >= 0)
4ef31082
LP
227 r = j;
228 }
229
230 return r;
231}
232
400fac06
AK
233int mount_setup_early(void) {
234 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
235
236 /* Do a minimal mount of /proc and friends to enable the most
237 * basic stuff, such as SELinux */
238 return mount_points_setup(N_EARLY_MOUNT, false);
239}
240
0c85a4f3 241int mount_cgroup_controllers(char ***join_controllers) {
a6b26d90 242 _cleanup_set_free_free_ Set *controllers = NULL;
a641dcd9 243 int r;
2076ca54 244
efdb0237
LP
245 if (!cg_is_legacy_wanted())
246 return 0;
247
670802d4 248 /* Mount all available cgroup controllers that are built into the kernel. */
2076ca54 249
6925a0de 250 r = cg_kernel_controllers(&controllers);
b12afc8c
LP
251 if (r < 0)
252 return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
0c85a4f3
LP
253
254 for (;;) {
a641dcd9 255 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
a6b26d90
ZJS
256 MountPoint p = {
257 .what = "cgroup",
258 .type = "cgroup",
259 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
260 .mode = MNT_IN_CONTAINER,
261 };
0c85a4f3
LP
262 char ***k = NULL;
263
264 controller = set_steal_first(controllers);
265 if (!controller)
266 break;
267
268 if (join_controllers)
269 for (k = join_controllers; *k; k++)
270 if (strv_find(*k, controller))
271 break;
272
273 if (k && *k) {
274 char **i, **j;
275
276 for (i = *k, j = *k; *i; i++) {
277
278 if (!streq(*i, controller)) {
a641dcd9 279 _cleanup_free_ char *t;
0c85a4f3
LP
280
281 t = set_remove(controllers, *i);
282 if (!t) {
283 free(*i);
284 continue;
285 }
0c85a4f3
LP
286 }
287
288 *(j++) = *i;
289 }
290
291 *j = NULL;
292
293 options = strv_join(*k, ",");
a6b26d90
ZJS
294 if (!options)
295 return log_oom();
0c85a4f3
LP
296 } else {
297 options = controller;
298 controller = NULL;
299 }
300
a641dcd9
LP
301 where = strappend("/sys/fs/cgroup/", options);
302 if (!where)
303 return log_oom();
304
305 p.where = where;
0c85a4f3 306 p.options = options;
2076ca54 307
4ef31082 308 r = mount_one(&p, true);
a6b26d90
ZJS
309 if (r < 0)
310 return r;
0c85a4f3
LP
311
312 if (r > 0 && k && *k) {
313 char **i;
314
315 for (i = *k; *i; i++) {
a641dcd9
LP
316 _cleanup_free_ char *t = NULL;
317
318 t = strappend("/sys/fs/cgroup/", *i);
319 if (!t)
320 return log_oom();
0c85a4f3
LP
321
322 r = symlink(options, t);
ea2b93a8 323 if (r >= 0) {
f8c1a81c 324#ifdef SMACK_RUN_LABEL
ea2b93a8
PO
325 _cleanup_free_ char *src;
326 src = strappend("/sys/fs/cgroup/", options);
327 if (!src)
328 return log_oom();
329 r = mac_smack_copy(t, src);
330 if (r < 0 && r != -EOPNOTSUPP)
331 return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", src, t);
f8c1a81c 332#endif
ea2b93a8
PO
333 } else if (errno != EEXIST)
334 return log_error_errno(errno, "Failed to create symlink %s: %m", t);
0c85a4f3
LP
335 }
336 }
2076ca54
LP
337 }
338
679142ce
LP
339 /* Now that we mounted everything, let's make the tmpfs the
340 * cgroup file systems are mounted into read-only. */
b12afc8c 341 (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
679142ce 342
a6b26d90 343 return 0;
2076ca54
LP
344}
345
f9fa32f0 346#if HAVE_SELINUX || ENABLE_SMACK
1829dc9d
LP
347static int nftw_cb(
348 const char *fpath,
349 const struct stat *sb,
350 int tflag,
351 struct FTW *ftwbuf) {
352
9fe117ea 353 /* No need to label /dev twice in a row... */
edb49778
LP
354 if (_unlikely_(ftwbuf->level == 0))
355 return FTW_CONTINUE;
356
c9bc0764 357 label_fix(fpath, false, false);
af65c248 358
edb49778 359 /* /run/initramfs is static data and big, no need to
af65c248 360 * dynamically relabel its contents at boot... */
edb49778
LP
361 if (_unlikely_(ftwbuf->level == 1 &&
362 tflag == FTW_D &&
363 streq(fpath, "/run/initramfs")))
364 return FTW_SKIP_SUBTREE;
9fe117ea 365
edb49778 366 return FTW_CONTINUE;
1829dc9d 367};
0fff82e5 368#endif
1829dc9d 369
0b3325e7 370int mount_setup(bool loaded_policy) {
68d4c452 371 int r = 0;
8e274523 372
400fac06 373 r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
68d4c452
LP
374 if (r < 0)
375 return r;
376
f9fa32f0 377#if HAVE_SELINUX || ENABLE_SMACK
f1d19aa4
LP
378 /* Nodes in devtmpfs and /run need to be manually updated for
379 * the appropriate labels, after mounting. The other virtual
380 * API file systems like /sys and /proc do not need that, they
381 * use the same label for all their files. */
0b3325e7
LP
382 if (loaded_policy) {
383 usec_t before_relabel, after_relabel;
384 char timespan[FORMAT_TIMESPAN_MAX];
385
386 before_relabel = now(CLOCK_MONOTONIC);
387
edb49778 388 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
cacf980e 389 nftw("/dev/shm", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
edb49778 390 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
0b3325e7
LP
391
392 after_relabel = now(CLOCK_MONOTONIC);
393
394 log_info("Relabelled /dev and /run in %s.",
2fa4092c 395 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
3bbecb2f 396 }
0fff82e5 397#endif
1829dc9d 398
5c0532d1 399 /* Create a few default symlinks, which are normally created
f1d19aa4 400 * by udevd, but some scripts might need them before we start
5c0532d1 401 * udevd. */
03cfe0d5 402 dev_setup(NULL, UID_INVALID, GID_INVALID);
5c0532d1 403
dee22f39
LP
404 /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
405 * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
406 * the box. If specific setups need other settings they can reset the propagation mode to private if
407 * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
408 * container manager we assume the container manager knows what it is doing (for example, because it set up
409 * some directories with different propagation modes). */
75f86906 410 if (detect_container() <= 0)
c481f78b 411 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
56f64d95 412 log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
b3ac5f8c 413
dee22f39
LP
414 /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
415 * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
416 * misdetect systemd. */
c4b41707
AP
417 (void) mkdir_label("/run/systemd", 0755);
418 (void) mkdir_label("/run/systemd/system", 0755);
dee22f39 419
c4b41707 420 /* Set up inaccessible items */
dee22f39 421 (void) mkdir_label("/run/systemd/inaccessible", 0000);
c4b41707
AP
422 (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
423 (void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
424 (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));
425 (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0));
426 (void) mkfifo("/run/systemd/inaccessible/fifo", 0000);
427 (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0);
b925e726 428
0c85a4f3 429 return 0;
8e274523 430}