]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
sd-bus: sync with kdbus upstream (ABI break)
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45 #include "smack-util.h"
46 #include "def.h"
47
48 typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52 } MountMode;
53
54 typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
60 bool (*condition_fn)(void);
61 MountMode mode;
62 } MountPoint;
63
64 /* The first three entries we might need before SELinux is up. The
65 * fourth (securityfs) is needed by IMA to load a custom policy. The
66 * other ones we can delay until SELinux and IMA are loaded. When
67 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
68 #ifdef HAVE_SMACK
69 #define N_EARLY_MOUNT 5
70 #else
71 #define N_EARLY_MOUNT 4
72 #endif
73
74 static const MountPoint mount_table[] = {
75 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 NULL, MNT_NONE },
83 #ifdef HAVE_SMACK
84 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
85 mac_smack_use, MNT_FATAL },
86 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 mac_smack_use, MNT_FATAL },
88 #endif
89 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
90 NULL, MNT_FATAL|MNT_IN_CONTAINER },
91 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
92 NULL, MNT_IN_CONTAINER },
93 #ifdef HAVE_SMACK
94 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
95 mac_smack_use, MNT_FATAL },
96 #endif
97 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
98 NULL, MNT_FATAL|MNT_IN_CONTAINER },
99 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
101 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 NULL, MNT_IN_CONTAINER },
103 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 NULL, MNT_FATAL|MNT_IN_CONTAINER },
105 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 NULL, MNT_NONE },
107 #ifdef ENABLE_EFI
108 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_efi_boot, MNT_NONE },
110 #endif
111 };
112
113 static const MountPoint mount_table_late[] = {
114 #ifdef ENABLE_KDBUS
115 { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
116 NULL, MNT_IN_CONTAINER },
117 #endif
118 };
119
120 /* These are API file systems that might be mounted by other software,
121 * we just list them here so that we know that we should ignore them */
122
123 static const char ignore_paths[] =
124 /* SELinux file systems */
125 "/sys/fs/selinux\0"
126 "/selinux\0"
127 /* Legacy cgroup mount points */
128 "/dev/cgroup\0"
129 "/cgroup\0"
130 /* Legacy kernel file system */
131 "/proc/bus/usb\0"
132 /* Container bind mounts */
133 "/proc/sys\0"
134 "/dev/console\0"
135 "/proc/kmsg\0";
136
137 bool mount_point_is_api(const char *path) {
138 unsigned i;
139
140 /* Checks if this mount point is considered "API", and hence
141 * should be ignored */
142
143 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
144 if (path_equal(path, mount_table[i].where))
145 return true;
146
147 return path_startswith(path, "/sys/fs/cgroup/");
148 }
149
150 bool mount_point_ignore(const char *path) {
151 const char *i;
152
153 NULSTR_FOREACH(i, ignore_paths)
154 if (path_equal(path, i))
155 return true;
156
157 return false;
158 }
159
160 static int mount_one(const MountPoint *p, bool relabel) {
161 int r;
162
163 assert(p);
164
165 if (p->condition_fn && !p->condition_fn())
166 return 0;
167
168 /* Relabel first, just in case */
169 if (relabel)
170 label_fix(p->where, true, true);
171
172 r = path_is_mount_point(p->where, true);
173 if (r < 0)
174 return r;
175
176 if (r > 0)
177 return 0;
178
179 /* Skip securityfs in a container */
180 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
181 return 0;
182
183 /* The access mode here doesn't really matter too much, since
184 * the mounted file system will take precedence anyway. */
185 if (relabel)
186 mkdir_p_label(p->where, 0755);
187 else
188 mkdir_p(p->where, 0755);
189
190 log_debug("Mounting %s to %s of type %s with options %s.",
191 p->what,
192 p->where,
193 p->type,
194 strna(p->options));
195
196 if (mount(p->what,
197 p->where,
198 p->type,
199 p->flags,
200 p->options) < 0) {
201 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
202 return (p->mode & MNT_FATAL) ? -errno : 0;
203 }
204
205 /* Relabel again, since we now mounted something fresh here */
206 if (relabel)
207 label_fix(p->where, false, false);
208
209 return 1;
210 }
211
212 int mount_setup_early(void) {
213 unsigned i;
214 int r = 0;
215
216 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
217
218 /* Do a minimal mount of /proc and friends to enable the most
219 * basic stuff, such as SELinux */
220 for (i = 0; i < N_EARLY_MOUNT; i ++) {
221 int j;
222
223 j = mount_one(mount_table + i, false);
224 if (r == 0)
225 r = j;
226 }
227
228 return r;
229 }
230
231 int mount_setup_late(void) {
232 unsigned i;
233 int r = 0;
234
235 for (i = 0; i < ELEMENTSOF(mount_table_late); i ++) {
236 int j;
237
238 j = mount_one(mount_table_late + i, false);
239 if (r == 0)
240 r = j;
241 }
242
243 return r;
244 }
245
246 int mount_cgroup_controllers(char ***join_controllers) {
247 _cleanup_set_free_free_ Set *controllers = NULL;
248 _cleanup_fclose_ FILE *f;
249 char buf[LINE_MAX];
250 int r;
251
252 /* Mount all available cgroup controllers that are built into the kernel. */
253
254 f = fopen("/proc/cgroups", "re");
255 if (!f) {
256 log_error("Failed to enumerate cgroup controllers: %m");
257 return 0;
258 }
259
260 controllers = set_new(&string_hash_ops);
261 if (!controllers)
262 return log_oom();
263
264 /* Ignore the header line */
265 (void) fgets(buf, sizeof(buf), f);
266
267 for (;;) {
268 char *controller;
269 int enabled = 0;
270
271 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
272
273 if (feof(f))
274 break;
275
276 log_error("Failed to parse /proc/cgroups.");
277 return -EIO;
278 }
279
280 if (!enabled) {
281 free(controller);
282 continue;
283 }
284
285 r = set_consume(controllers, controller);
286 if (r < 0) {
287 log_error("Failed to add controller to set.");
288 return r;
289 }
290 }
291
292 for (;;) {
293 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
294 MountPoint p = {
295 .what = "cgroup",
296 .type = "cgroup",
297 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
298 .mode = MNT_IN_CONTAINER,
299 };
300 char ***k = NULL;
301
302 controller = set_steal_first(controllers);
303 if (!controller)
304 break;
305
306 if (join_controllers)
307 for (k = join_controllers; *k; k++)
308 if (strv_find(*k, controller))
309 break;
310
311 if (k && *k) {
312 char **i, **j;
313
314 for (i = *k, j = *k; *i; i++) {
315
316 if (!streq(*i, controller)) {
317 _cleanup_free_ char *t;
318
319 t = set_remove(controllers, *i);
320 if (!t) {
321 free(*i);
322 continue;
323 }
324 }
325
326 *(j++) = *i;
327 }
328
329 *j = NULL;
330
331 options = strv_join(*k, ",");
332 if (!options)
333 return log_oom();
334 } else {
335 options = controller;
336 controller = NULL;
337 }
338
339 where = strappend("/sys/fs/cgroup/", options);
340 if (!where)
341 return log_oom();
342
343 p.where = where;
344 p.options = options;
345
346 r = mount_one(&p, true);
347 if (r < 0)
348 return r;
349
350 if (r > 0 && k && *k) {
351 char **i;
352
353 for (i = *k; *i; i++) {
354 _cleanup_free_ char *t = NULL;
355
356 t = strappend("/sys/fs/cgroup/", *i);
357 if (!t)
358 return log_oom();
359
360 r = symlink(options, t);
361 if (r < 0 && errno != EEXIST) {
362 log_error("Failed to create symlink %s: %m", t);
363 return -errno;
364 }
365 }
366 }
367 }
368
369 /* Now that we mounted everything, let's make the tmpfs the
370 * cgroup file systems are mounted into read-only. */
371 mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
372
373 return 0;
374 }
375
376 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
377 static int nftw_cb(
378 const char *fpath,
379 const struct stat *sb,
380 int tflag,
381 struct FTW *ftwbuf) {
382
383 /* No need to label /dev twice in a row... */
384 if (_unlikely_(ftwbuf->level == 0))
385 return FTW_CONTINUE;
386
387 label_fix(fpath, false, false);
388
389 /* /run/initramfs is static data and big, no need to
390 * dynamically relabel its contents at boot... */
391 if (_unlikely_(ftwbuf->level == 1 &&
392 tflag == FTW_D &&
393 streq(fpath, "/run/initramfs")))
394 return FTW_SKIP_SUBTREE;
395
396 return FTW_CONTINUE;
397 };
398 #endif
399
400 int mount_setup(bool loaded_policy) {
401 int r;
402 unsigned i;
403
404 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
405 r = mount_one(mount_table + i, true);
406
407 if (r < 0)
408 return r;
409 }
410
411 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
412 /* Nodes in devtmpfs and /run need to be manually updated for
413 * the appropriate labels, after mounting. The other virtual
414 * API file systems like /sys and /proc do not need that, they
415 * use the same label for all their files. */
416 if (loaded_policy) {
417 usec_t before_relabel, after_relabel;
418 char timespan[FORMAT_TIMESPAN_MAX];
419
420 before_relabel = now(CLOCK_MONOTONIC);
421
422 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
423 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
424
425 after_relabel = now(CLOCK_MONOTONIC);
426
427 log_info("Relabelled /dev and /run in %s.",
428 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
429 }
430 #endif
431
432 /* Create a few default symlinks, which are normally created
433 * by udevd, but some scripts might need them before we start
434 * udevd. */
435 dev_setup(NULL);
436
437 /* Mark the root directory as shared in regards to mount
438 * propagation. The kernel defaults to "private", but we think
439 * it makes more sense to have a default of "shared" so that
440 * nspawn and the container tools work out of the box. If
441 * specific setups need other settings they can reset the
442 * propagation mode to private if needed. */
443 if (detect_container(NULL) <= 0)
444 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
445 log_warning("Failed to set up the root directory for shared mount propagation: %m");
446
447 /* Create a few directories we always want around, Note that
448 * sd_booted() checks for /run/systemd/system, so this mkdir
449 * really needs to stay for good, otherwise software that
450 * copied sd-daemon.c into their sources will misdetect
451 * systemd. */
452 mkdir_label("/run/systemd", 0755);
453 mkdir_label("/run/systemd/system", 0755);
454 mkdir_label("/run/systemd/inaccessible", 0000);
455
456 return 0;
457 }