]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
9894c7fddffc8b083fea4a9e24ee973a6682014e
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44
45 #ifndef TTY_GID
46 #define TTY_GID 5
47 #endif
48
49 typedef struct MountPoint {
50 const char *what;
51 const char *where;
52 const char *type;
53 const char *options;
54 unsigned long flags;
55 bool fatal;
56 bool in_container;
57 } MountPoint;
58
59 /* The first three entries we might need before SELinux is up. The
60 * fourth (securityfs) is needed by IMA to load a custom policy. The
61 * other ones we can delay until SELinux and IMA are loaded. */
62 #define N_EARLY_MOUNT 4
63
64 static const MountPoint mount_table[] = {
65 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
66 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
67 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, true },
68 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false },
69 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false },
70 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, true },
71 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, false, true },
72 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, true },
73 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, false, true },
74 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, true },
75 };
76
77 /* These are API file systems that might be mounted by other software,
78 * we just list them here so that we know that we should ignore them */
79
80 static const char ignore_paths[] =
81 /* SELinux file systems */
82 "/sys/fs/selinux\0"
83 "/selinux\0"
84 /* Legacy cgroup mount points */
85 "/dev/cgroup\0"
86 "/cgroup\0"
87 /* Legacy kernel file system */
88 "/proc/bus/usb\0"
89 /* Container bind mounts */
90 "/proc/sys\0"
91 "/dev/console\0"
92 "/proc/kmsg\0";
93
94 bool mount_point_is_api(const char *path) {
95 unsigned i;
96
97 /* Checks if this mount point is considered "API", and hence
98 * should be ignored */
99
100 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
101 if (path_equal(path, mount_table[i].where))
102 return true;
103
104 return path_startswith(path, "/sys/fs/cgroup/");
105 }
106
107 bool mount_point_ignore(const char *path) {
108 const char *i;
109
110 NULSTR_FOREACH(i, ignore_paths)
111 if (path_equal(path, i))
112 return true;
113
114 return false;
115 }
116
117 static int mount_one(const MountPoint *p, bool relabel) {
118 int r;
119
120 assert(p);
121
122 /* Relabel first, just in case */
123 if (relabel)
124 label_fix(p->where, true, true);
125
126 r = path_is_mount_point(p->where, true);
127 if (r < 0)
128 return r;
129
130 if (r > 0)
131 return 0;
132
133 /* Skip securityfs in a container */
134 if (!p->in_container && detect_container(NULL) > 0)
135 return 0;
136
137 /* The access mode here doesn't really matter too much, since
138 * the mounted file system will take precedence anyway. */
139 mkdir_p_label(p->where, 0755);
140
141 log_debug("Mounting %s to %s of type %s with options %s.",
142 p->what,
143 p->where,
144 p->type,
145 strna(p->options));
146
147 if (mount(p->what,
148 p->where,
149 p->type,
150 p->flags,
151 p->options) < 0) {
152 log_full(p->fatal ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
153 return p->fatal ? -errno : 0;
154 }
155
156 /* Relabel again, since we now mounted something fresh here */
157 if (relabel)
158 label_fix(p->where, false, false);
159
160 return 1;
161 }
162
163 int mount_setup_early(void) {
164 unsigned i;
165 int r = 0;
166
167 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
168
169 /* Do a minimal mount of /proc and friends to enable the most
170 * basic stuff, such as SELinux */
171 for (i = 0; i < N_EARLY_MOUNT; i ++) {
172 int j;
173
174 j = mount_one(mount_table + i, false);
175 if (r == 0)
176 r = j;
177 }
178
179 return r;
180 }
181
182 int mount_cgroup_controllers(char ***join_controllers) {
183 int r;
184 FILE *f;
185 char buf[LINE_MAX];
186 Set *controllers;
187
188 /* Mount all available cgroup controllers that are built into the kernel. */
189
190 f = fopen("/proc/cgroups", "re");
191 if (!f) {
192 log_error("Failed to enumerate cgroup controllers: %m");
193 return 0;
194 }
195
196 controllers = set_new(string_hash_func, string_compare_func);
197 if (!controllers) {
198 r = log_oom();
199 goto finish;
200 }
201
202 /* Ignore the header line */
203 (void) fgets(buf, sizeof(buf), f);
204
205 for (;;) {
206 char *controller;
207 int enabled = 0;
208
209 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
210
211 if (feof(f))
212 break;
213
214 log_error("Failed to parse /proc/cgroups.");
215 r = -EIO;
216 goto finish;
217 }
218
219 if (!enabled) {
220 free(controller);
221 continue;
222 }
223
224 r = set_put(controllers, controller);
225 if (r < 0) {
226 log_error("Failed to add controller to set.");
227 free(controller);
228 goto finish;
229 }
230 }
231
232 for (;;) {
233 MountPoint p;
234 char *controller, *where, *options;
235 char ***k = NULL;
236
237 controller = set_steal_first(controllers);
238 if (!controller)
239 break;
240
241 if (join_controllers)
242 for (k = join_controllers; *k; k++)
243 if (strv_find(*k, controller))
244 break;
245
246 if (k && *k) {
247 char **i, **j;
248
249 for (i = *k, j = *k; *i; i++) {
250
251 if (!streq(*i, controller)) {
252 char *t;
253
254 t = set_remove(controllers, *i);
255 if (!t) {
256 free(*i);
257 continue;
258 }
259 free(t);
260 }
261
262 *(j++) = *i;
263 }
264
265 *j = NULL;
266
267 options = strv_join(*k, ",");
268 if (!options) {
269 free(controller);
270 r = log_oom();
271 goto finish;
272 }
273
274 } else {
275 options = controller;
276 controller = NULL;
277 }
278
279 where = strappend("/sys/fs/cgroup/", options);
280 if (!where) {
281 free(options);
282 r = log_oom();
283 goto finish;
284 }
285
286 zero(p);
287 p.what = "cgroup";
288 p.where = where;
289 p.type = "cgroup";
290 p.options = options;
291 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
292 p.fatal = false;
293
294 r = mount_one(&p, true);
295 free(controller);
296 free(where);
297
298 if (r < 0) {
299 free(options);
300 goto finish;
301 }
302
303 if (r > 0 && k && *k) {
304 char **i;
305
306 for (i = *k; *i; i++) {
307 char *t;
308
309 t = strappend("/sys/fs/cgroup/", *i);
310 if (!t) {
311 r = log_oom();
312 free(options);
313 goto finish;
314 }
315
316 r = symlink(options, t);
317 free(t);
318
319 if (r < 0 && errno != EEXIST) {
320 log_error("Failed to create symlink: %m");
321 r = -errno;
322 free(options);
323 goto finish;
324 }
325 }
326 }
327
328 free(options);
329 }
330
331 r = 0;
332
333 finish:
334 set_free_free(controllers);
335
336 fclose(f);
337
338 return r;
339 }
340
341 static int nftw_cb(
342 const char *fpath,
343 const struct stat *sb,
344 int tflag,
345 struct FTW *ftwbuf) {
346
347 /* No need to label /dev twice in a row... */
348 if (_unlikely_(ftwbuf->level == 0))
349 return FTW_CONTINUE;
350
351 label_fix(fpath, false, false);
352
353 /* /run/initramfs is static data and big, no need to
354 * dynamically relabel its contents at boot... */
355 if (_unlikely_(ftwbuf->level == 1 &&
356 tflag == FTW_D &&
357 streq(fpath, "/run/initramfs")))
358 return FTW_SKIP_SUBTREE;
359
360 return FTW_CONTINUE;
361 };
362
363 int mount_setup(bool loaded_policy) {
364
365 static const char relabel[] =
366 "/run/initramfs/root-fsck\0"
367 "/run/initramfs/shutdown\0";
368
369 int r;
370 unsigned i;
371 const char *j;
372
373 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
374 r = mount_one(mount_table + i, true);
375
376 if (r < 0)
377 return r;
378 }
379
380 /* Nodes in devtmpfs and /run need to be manually updated for
381 * the appropriate labels, after mounting. The other virtual
382 * API file systems like /sys and /proc do not need that, they
383 * use the same label for all their files. */
384 if (loaded_policy) {
385 usec_t before_relabel, after_relabel;
386 char timespan[FORMAT_TIMESPAN_MAX];
387
388 before_relabel = now(CLOCK_MONOTONIC);
389
390 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
391 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
392
393 /* Explicitly relabel these */
394 NULSTR_FOREACH(j, relabel)
395 label_fix(j, true, false);
396
397 after_relabel = now(CLOCK_MONOTONIC);
398
399 log_info("Relabelled /dev and /run in %s.",
400 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
401 }
402
403 /* Create a few default symlinks, which are normally created
404 * by udevd, but some scripts might need them before we start
405 * udevd. */
406 dev_setup(NULL);
407
408 /* Mark the root directory as shared in regards to mount
409 * propagation. The kernel defaults to "private", but we think
410 * it makes more sense to have a default of "shared" so that
411 * nspawn and the container tools work out of the box. If
412 * specific setups need other settings they can reset the
413 * propagation mode to private if needed. */
414 if (detect_container(NULL) <= 0)
415 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
416 log_warning("Failed to set up the root directory for shared mount propagation: %m");
417
418 /* Create a few directories we always want around */
419 mkdir_label("/run/systemd", 0755);
420 mkdir_label("/run/systemd/system", 0755);
421
422 return 0;
423 }