]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
23a66d2e957a5fc5d4f50870791a082ea7108cd8
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45 #include "smack-util.h"
46 #include "def.h"
47
48 typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52 } MountMode;
53
54 typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
60 bool (*condition_fn)(void);
61 MountMode mode;
62 } MountPoint;
63
64 /* The first three entries we might need before SELinux is up. The
65 * fourth (securityfs) is needed by IMA to load a custom policy. The
66 * other ones we can delay until SELinux and IMA are loaded. When
67 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
68 #ifdef HAVE_SMACK
69 #define N_EARLY_MOUNT 5
70 #else
71 #define N_EARLY_MOUNT 4
72 #endif
73
74 static const MountPoint mount_table[] = {
75 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 NULL, MNT_NONE },
83 #ifdef HAVE_SMACK
84 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
85 use_smack, MNT_FATAL },
86 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 use_smack, MNT_FATAL },
88 #endif
89 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
90 NULL, MNT_FATAL|MNT_IN_CONTAINER },
91 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
92 NULL, MNT_IN_CONTAINER },
93 #ifdef HAVE_SMACK
94 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
95 use_smack, MNT_FATAL },
96 #endif
97 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
98 NULL, MNT_FATAL|MNT_IN_CONTAINER },
99 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
101 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 NULL, MNT_IN_CONTAINER },
103 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 NULL, MNT_FATAL|MNT_IN_CONTAINER },
105 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 NULL, MNT_NONE },
107 #ifdef ENABLE_EFI
108 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_efi_boot, MNT_NONE },
110 #endif
111 };
112
113 /* These are API file systems that might be mounted by other software,
114 * we just list them here so that we know that we should ignore them */
115
116 static const char ignore_paths[] =
117 /* SELinux file systems */
118 "/sys/fs/selinux\0"
119 "/selinux\0"
120 /* Legacy cgroup mount points */
121 "/dev/cgroup\0"
122 "/cgroup\0"
123 /* Legacy kernel file system */
124 "/proc/bus/usb\0"
125 /* Container bind mounts */
126 "/proc/sys\0"
127 "/dev/console\0"
128 "/proc/kmsg\0";
129
130 bool mount_point_is_api(const char *path) {
131 unsigned i;
132
133 /* Checks if this mount point is considered "API", and hence
134 * should be ignored */
135
136 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
137 if (path_equal(path, mount_table[i].where))
138 return true;
139
140 return path_startswith(path, "/sys/fs/cgroup/");
141 }
142
143 bool mount_point_ignore(const char *path) {
144 const char *i;
145
146 NULSTR_FOREACH(i, ignore_paths)
147 if (path_equal(path, i))
148 return true;
149
150 return false;
151 }
152
153 static int mount_one(const MountPoint *p, bool relabel) {
154 int r;
155
156 assert(p);
157
158 if (p->condition_fn && !p->condition_fn())
159 return 0;
160
161 /* Relabel first, just in case */
162 if (relabel)
163 label_fix(p->where, true, true);
164
165 r = path_is_mount_point(p->where, true);
166 if (r < 0)
167 return r;
168
169 if (r > 0)
170 return 0;
171
172 /* Skip securityfs in a container */
173 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
174 return 0;
175
176 /* The access mode here doesn't really matter too much, since
177 * the mounted file system will take precedence anyway. */
178 if (relabel)
179 mkdir_p_label(p->where, 0755);
180 else
181 mkdir_p(p->where, 0755);
182
183 log_debug("Mounting %s to %s of type %s with options %s.",
184 p->what,
185 p->where,
186 p->type,
187 strna(p->options));
188
189 if (mount(p->what,
190 p->where,
191 p->type,
192 p->flags,
193 p->options) < 0) {
194 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
195 return (p->mode & MNT_FATAL) ? -errno : 0;
196 }
197
198 /* Relabel again, since we now mounted something fresh here */
199 if (relabel)
200 label_fix(p->where, false, false);
201
202 return 1;
203 }
204
205 int mount_setup_early(void) {
206 unsigned i;
207 int r = 0;
208
209 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
210
211 /* Do a minimal mount of /proc and friends to enable the most
212 * basic stuff, such as SELinux */
213 for (i = 0; i < N_EARLY_MOUNT; i ++) {
214 int j;
215
216 j = mount_one(mount_table + i, false);
217 if (r == 0)
218 r = j;
219 }
220
221 return r;
222 }
223
224 int mount_cgroup_controllers(char ***join_controllers) {
225 _cleanup_set_free_free_ Set *controllers = NULL;
226 _cleanup_fclose_ FILE *f;
227 char buf[LINE_MAX];
228 int r;
229
230 /* Mount all available cgroup controllers that are built into the kernel. */
231
232 f = fopen("/proc/cgroups", "re");
233 if (!f) {
234 log_error("Failed to enumerate cgroup controllers: %m");
235 return 0;
236 }
237
238 controllers = set_new(&string_hash_ops);
239 if (!controllers)
240 return log_oom();
241
242 /* Ignore the header line */
243 (void) fgets(buf, sizeof(buf), f);
244
245 for (;;) {
246 char *controller;
247 int enabled = 0;
248
249 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
250
251 if (feof(f))
252 break;
253
254 log_error("Failed to parse /proc/cgroups.");
255 return -EIO;
256 }
257
258 if (!enabled) {
259 free(controller);
260 continue;
261 }
262
263 r = set_consume(controllers, controller);
264 if (r < 0) {
265 log_error("Failed to add controller to set.");
266 return r;
267 }
268 }
269
270 for (;;) {
271 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
272 MountPoint p = {
273 .what = "cgroup",
274 .type = "cgroup",
275 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
276 .mode = MNT_IN_CONTAINER,
277 };
278 char ***k = NULL;
279
280 controller = set_steal_first(controllers);
281 if (!controller)
282 break;
283
284 if (join_controllers)
285 for (k = join_controllers; *k; k++)
286 if (strv_find(*k, controller))
287 break;
288
289 if (k && *k) {
290 char **i, **j;
291
292 for (i = *k, j = *k; *i; i++) {
293
294 if (!streq(*i, controller)) {
295 _cleanup_free_ char *t;
296
297 t = set_remove(controllers, *i);
298 if (!t) {
299 free(*i);
300 continue;
301 }
302 }
303
304 *(j++) = *i;
305 }
306
307 *j = NULL;
308
309 options = strv_join(*k, ",");
310 if (!options)
311 return log_oom();
312 } else {
313 options = controller;
314 controller = NULL;
315 }
316
317 where = strappend("/sys/fs/cgroup/", options);
318 if (!where)
319 return log_oom();
320
321 p.where = where;
322 p.options = options;
323
324 r = mount_one(&p, true);
325 if (r < 0)
326 return r;
327
328 if (r > 0 && k && *k) {
329 char **i;
330
331 for (i = *k; *i; i++) {
332 _cleanup_free_ char *t = NULL;
333
334 t = strappend("/sys/fs/cgroup/", *i);
335 if (!t)
336 return log_oom();
337
338 r = symlink(options, t);
339 if (r < 0 && errno != EEXIST) {
340 log_error("Failed to create symlink %s: %m", t);
341 return -errno;
342 }
343 }
344 }
345 }
346
347 /* Now that we mounted everything, let's make the tmpfs the
348 * cgroup file systems are mounted into read-only. */
349 mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
350
351 return 0;
352 }
353
354 static int nftw_cb(
355 const char *fpath,
356 const struct stat *sb,
357 int tflag,
358 struct FTW *ftwbuf) {
359
360 /* No need to label /dev twice in a row... */
361 if (_unlikely_(ftwbuf->level == 0))
362 return FTW_CONTINUE;
363
364 label_fix(fpath, false, false);
365
366 /* /run/initramfs is static data and big, no need to
367 * dynamically relabel its contents at boot... */
368 if (_unlikely_(ftwbuf->level == 1 &&
369 tflag == FTW_D &&
370 streq(fpath, "/run/initramfs")))
371 return FTW_SKIP_SUBTREE;
372
373 return FTW_CONTINUE;
374 };
375
376 int mount_setup(bool loaded_policy) {
377 int r;
378 unsigned i;
379
380 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
381 r = mount_one(mount_table + i, true);
382
383 if (r < 0)
384 return r;
385 }
386
387 /* Nodes in devtmpfs and /run need to be manually updated for
388 * the appropriate labels, after mounting. The other virtual
389 * API file systems like /sys and /proc do not need that, they
390 * use the same label for all their files. */
391 if (loaded_policy) {
392 usec_t before_relabel, after_relabel;
393 char timespan[FORMAT_TIMESPAN_MAX];
394
395 before_relabel = now(CLOCK_MONOTONIC);
396
397 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
398 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
399
400 after_relabel = now(CLOCK_MONOTONIC);
401
402 log_info("Relabelled /dev and /run in %s.",
403 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
404 }
405
406 /* Create a few default symlinks, which are normally created
407 * by udevd, but some scripts might need them before we start
408 * udevd. */
409 dev_setup(NULL);
410
411 /* Mark the root directory as shared in regards to mount
412 * propagation. The kernel defaults to "private", but we think
413 * it makes more sense to have a default of "shared" so that
414 * nspawn and the container tools work out of the box. If
415 * specific setups need other settings they can reset the
416 * propagation mode to private if needed. */
417 if (detect_container(NULL) <= 0)
418 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
419 log_warning("Failed to set up the root directory for shared mount propagation: %m");
420
421 /* Create a few directories we always want around, Note that
422 * sd_booted() checks for /run/systemd/system, so this mkdir
423 * really needs to stay for good, otherwise software that
424 * copied sd-daemon.c into their sources will misdetect
425 * systemd. */
426 mkdir_label("/run/systemd", 0755);
427 mkdir_label("/run/systemd/system", 0755);
428 mkdir_label("/run/systemd/inaccessible", 0000);
429
430 return 0;
431 }