]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
Merge nss-myhostname
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44
45 #ifndef TTY_GID
46 #define TTY_GID 5
47 #endif
48
49 typedef enum MountMode {
50 MNT_NONE = 0,
51 MNT_FATAL = 1 << 0,
52 MNT_IN_CONTAINER = 1 << 1,
53 } MountMode;
54
55 typedef struct MountPoint {
56 const char *what;
57 const char *where;
58 const char *type;
59 const char *options;
60 unsigned long flags;
61 bool (*condition_fn)(void);
62 MountMode mode;
63 } MountPoint;
64
65 /* The first three entries we might need before SELinux is up. The
66 * fourth (securityfs) is needed by IMA to load a custom policy. The
67 * other ones we can delay until SELinux and IMA are loaded. */
68 #define N_EARLY_MOUNT 4
69
70 static const MountPoint mount_table[] = {
71 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
72 NULL, MNT_FATAL|MNT_IN_CONTAINER },
73 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
74 NULL, MNT_FATAL|MNT_IN_CONTAINER },
75 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_NONE },
79 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
80 is_efiboot, MNT_NONE },
81 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
82 NULL, MNT_FATAL|MNT_IN_CONTAINER },
83 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
84 NULL, MNT_IN_CONTAINER },
85 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
86 NULL, MNT_FATAL|MNT_IN_CONTAINER },
87 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
88 NULL, MNT_IN_CONTAINER },
89 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
90 NULL, MNT_IN_CONTAINER },
91 };
92
93 /* These are API file systems that might be mounted by other software,
94 * we just list them here so that we know that we should ignore them */
95
96 static const char ignore_paths[] =
97 /* SELinux file systems */
98 "/sys/fs/selinux\0"
99 "/selinux\0"
100 /* Legacy cgroup mount points */
101 "/dev/cgroup\0"
102 "/cgroup\0"
103 /* Legacy kernel file system */
104 "/proc/bus/usb\0"
105 /* Container bind mounts */
106 "/proc/sys\0"
107 "/dev/console\0"
108 "/proc/kmsg\0";
109
110 bool mount_point_is_api(const char *path) {
111 unsigned i;
112
113 /* Checks if this mount point is considered "API", and hence
114 * should be ignored */
115
116 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
117 if (path_equal(path, mount_table[i].where))
118 return true;
119
120 return path_startswith(path, "/sys/fs/cgroup/");
121 }
122
123 bool mount_point_ignore(const char *path) {
124 const char *i;
125
126 NULSTR_FOREACH(i, ignore_paths)
127 if (path_equal(path, i))
128 return true;
129
130 return false;
131 }
132
133 static int mount_one(const MountPoint *p, bool relabel) {
134 int r;
135
136 assert(p);
137
138 if (p->condition_fn && !p->condition_fn())
139 return 0;
140
141 /* Relabel first, just in case */
142 if (relabel)
143 label_fix(p->where, true, true);
144
145 r = path_is_mount_point(p->where, true);
146 if (r < 0)
147 return r;
148
149 if (r > 0)
150 return 0;
151
152 /* Skip securityfs in a container */
153 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
154 return 0;
155
156 /* The access mode here doesn't really matter too much, since
157 * the mounted file system will take precedence anyway. */
158 mkdir_p_label(p->where, 0755);
159
160 log_debug("Mounting %s to %s of type %s with options %s.",
161 p->what,
162 p->where,
163 p->type,
164 strna(p->options));
165
166 if (mount(p->what,
167 p->where,
168 p->type,
169 p->flags,
170 p->options) < 0) {
171 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
172 return (p->mode & MNT_FATAL) ? -errno : 0;
173 }
174
175 /* Relabel again, since we now mounted something fresh here */
176 if (relabel)
177 label_fix(p->where, false, false);
178
179 return 1;
180 }
181
182 int mount_setup_early(void) {
183 unsigned i;
184 int r = 0;
185
186 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
187
188 /* Do a minimal mount of /proc and friends to enable the most
189 * basic stuff, such as SELinux */
190 for (i = 0; i < N_EARLY_MOUNT; i ++) {
191 int j;
192
193 j = mount_one(mount_table + i, false);
194 if (r == 0)
195 r = j;
196 }
197
198 return r;
199 }
200
201 int mount_cgroup_controllers(char ***join_controllers) {
202 int r;
203 FILE *f;
204 char buf[LINE_MAX];
205 Set *controllers;
206
207 /* Mount all available cgroup controllers that are built into the kernel. */
208
209 f = fopen("/proc/cgroups", "re");
210 if (!f) {
211 log_error("Failed to enumerate cgroup controllers: %m");
212 return 0;
213 }
214
215 controllers = set_new(string_hash_func, string_compare_func);
216 if (!controllers) {
217 r = log_oom();
218 goto finish;
219 }
220
221 /* Ignore the header line */
222 (void) fgets(buf, sizeof(buf), f);
223
224 for (;;) {
225 char *controller;
226 int enabled = 0;
227
228 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
229
230 if (feof(f))
231 break;
232
233 log_error("Failed to parse /proc/cgroups.");
234 r = -EIO;
235 goto finish;
236 }
237
238 if (!enabled) {
239 free(controller);
240 continue;
241 }
242
243 r = set_put(controllers, controller);
244 if (r < 0) {
245 log_error("Failed to add controller to set.");
246 free(controller);
247 goto finish;
248 }
249 }
250
251 for (;;) {
252 MountPoint p;
253 char *controller, *where, *options;
254 char ***k = NULL;
255
256 controller = set_steal_first(controllers);
257 if (!controller)
258 break;
259
260 if (join_controllers)
261 for (k = join_controllers; *k; k++)
262 if (strv_find(*k, controller))
263 break;
264
265 if (k && *k) {
266 char **i, **j;
267
268 for (i = *k, j = *k; *i; i++) {
269
270 if (!streq(*i, controller)) {
271 char *t;
272
273 t = set_remove(controllers, *i);
274 if (!t) {
275 free(*i);
276 continue;
277 }
278 free(t);
279 }
280
281 *(j++) = *i;
282 }
283
284 *j = NULL;
285
286 options = strv_join(*k, ",");
287 if (!options) {
288 free(controller);
289 r = log_oom();
290 goto finish;
291 }
292
293 } else {
294 options = controller;
295 controller = NULL;
296 }
297
298 where = strappend("/sys/fs/cgroup/", options);
299 if (!where) {
300 free(options);
301 r = log_oom();
302 goto finish;
303 }
304
305 zero(p);
306 p.what = "cgroup";
307 p.where = where;
308 p.type = "cgroup";
309 p.options = options;
310 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
311
312 r = mount_one(&p, true);
313 free(controller);
314 free(where);
315
316 if (r < 0) {
317 free(options);
318 goto finish;
319 }
320
321 if (r > 0 && k && *k) {
322 char **i;
323
324 for (i = *k; *i; i++) {
325 char *t;
326
327 t = strappend("/sys/fs/cgroup/", *i);
328 if (!t) {
329 r = log_oom();
330 free(options);
331 goto finish;
332 }
333
334 r = symlink(options, t);
335 free(t);
336
337 if (r < 0 && errno != EEXIST) {
338 log_error("Failed to create symlink: %m");
339 r = -errno;
340 free(options);
341 goto finish;
342 }
343 }
344 }
345
346 free(options);
347 }
348
349 r = 0;
350
351 finish:
352 set_free_free(controllers);
353
354 fclose(f);
355
356 return r;
357 }
358
359 static int nftw_cb(
360 const char *fpath,
361 const struct stat *sb,
362 int tflag,
363 struct FTW *ftwbuf) {
364
365 /* No need to label /dev twice in a row... */
366 if (_unlikely_(ftwbuf->level == 0))
367 return FTW_CONTINUE;
368
369 label_fix(fpath, false, false);
370
371 /* /run/initramfs is static data and big, no need to
372 * dynamically relabel its contents at boot... */
373 if (_unlikely_(ftwbuf->level == 1 &&
374 tflag == FTW_D &&
375 streq(fpath, "/run/initramfs")))
376 return FTW_SKIP_SUBTREE;
377
378 return FTW_CONTINUE;
379 };
380
381 int mount_setup(bool loaded_policy) {
382
383 static const char relabel[] =
384 "/run/initramfs/root-fsck\0"
385 "/run/initramfs/shutdown\0";
386
387 int r;
388 unsigned i;
389 const char *j;
390
391 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
392 r = mount_one(mount_table + i, true);
393
394 if (r < 0)
395 return r;
396 }
397
398 /* Nodes in devtmpfs and /run need to be manually updated for
399 * the appropriate labels, after mounting. The other virtual
400 * API file systems like /sys and /proc do not need that, they
401 * use the same label for all their files. */
402 if (loaded_policy) {
403 usec_t before_relabel, after_relabel;
404 char timespan[FORMAT_TIMESPAN_MAX];
405
406 before_relabel = now(CLOCK_MONOTONIC);
407
408 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
409 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
410
411 /* Explicitly relabel these */
412 NULSTR_FOREACH(j, relabel)
413 label_fix(j, true, false);
414
415 after_relabel = now(CLOCK_MONOTONIC);
416
417 log_info("Relabelled /dev and /run in %s.",
418 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
419 }
420
421 /* Create a few default symlinks, which are normally created
422 * by udevd, but some scripts might need them before we start
423 * udevd. */
424 dev_setup(NULL);
425
426 /* Mark the root directory as shared in regards to mount
427 * propagation. The kernel defaults to "private", but we think
428 * it makes more sense to have a default of "shared" so that
429 * nspawn and the container tools work out of the box. If
430 * specific setups need other settings they can reset the
431 * propagation mode to private if needed. */
432 if (detect_container(NULL) <= 0)
433 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
434 log_warning("Failed to set up the root directory for shared mount propagation: %m");
435
436 /* Create a few directories we always want around */
437 mkdir_label("/run/systemd", 0755);
438 mkdir_label("/run/systemd/system", 0755);
439
440 return 0;
441 }