]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
mac: also rename use_{smack,selinux,apparmor}() calls so that they share the new...
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45 #include "smack-util.h"
46 #include "def.h"
47
48 typedef enum MountMode {
49 MNT_NONE = 0,
50 MNT_FATAL = 1 << 0,
51 MNT_IN_CONTAINER = 1 << 1,
52 } MountMode;
53
54 typedef struct MountPoint {
55 const char *what;
56 const char *where;
57 const char *type;
58 const char *options;
59 unsigned long flags;
60 bool (*condition_fn)(void);
61 MountMode mode;
62 } MountPoint;
63
64 /* The first three entries we might need before SELinux is up. The
65 * fourth (securityfs) is needed by IMA to load a custom policy. The
66 * other ones we can delay until SELinux and IMA are loaded. When
67 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
68 #ifdef HAVE_SMACK
69 #define N_EARLY_MOUNT 5
70 #else
71 #define N_EARLY_MOUNT 4
72 #endif
73
74 static const MountPoint mount_table[] = {
75 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
80 NULL, MNT_FATAL|MNT_IN_CONTAINER },
81 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
82 NULL, MNT_NONE },
83 #ifdef HAVE_SMACK
84 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
85 mac_smack_use, MNT_FATAL },
86 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
87 mac_smack_use, MNT_FATAL },
88 #endif
89 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
90 NULL, MNT_FATAL|MNT_IN_CONTAINER },
91 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
92 NULL, MNT_IN_CONTAINER },
93 #ifdef HAVE_SMACK
94 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
95 mac_smack_use, MNT_FATAL },
96 #endif
97 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
98 NULL, MNT_FATAL|MNT_IN_CONTAINER },
99 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
101 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 NULL, MNT_IN_CONTAINER },
103 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 NULL, MNT_FATAL|MNT_IN_CONTAINER },
105 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 NULL, MNT_NONE },
107 #ifdef ENABLE_EFI
108 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_efi_boot, MNT_NONE },
110 #endif
111 };
112
113 /* These are API file systems that might be mounted by other software,
114 * we just list them here so that we know that we should ignore them */
115
116 static const char ignore_paths[] =
117 /* SELinux file systems */
118 "/sys/fs/selinux\0"
119 "/selinux\0"
120 /* Legacy cgroup mount points */
121 "/dev/cgroup\0"
122 "/cgroup\0"
123 /* Legacy kernel file system */
124 "/proc/bus/usb\0"
125 /* Container bind mounts */
126 "/proc/sys\0"
127 "/dev/console\0"
128 "/proc/kmsg\0";
129
130 bool mount_point_is_api(const char *path) {
131 unsigned i;
132
133 /* Checks if this mount point is considered "API", and hence
134 * should be ignored */
135
136 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
137 if (path_equal(path, mount_table[i].where))
138 return true;
139
140 return path_startswith(path, "/sys/fs/cgroup/");
141 }
142
143 bool mount_point_ignore(const char *path) {
144 const char *i;
145
146 NULSTR_FOREACH(i, ignore_paths)
147 if (path_equal(path, i))
148 return true;
149
150 return false;
151 }
152
153 static int mount_one(const MountPoint *p, bool relabel) {
154 int r;
155
156 assert(p);
157
158 if (p->condition_fn && !p->condition_fn())
159 return 0;
160
161 /* Relabel first, just in case */
162 if (relabel)
163 label_fix(p->where, true, true);
164
165 r = path_is_mount_point(p->where, true);
166 if (r < 0)
167 return r;
168
169 if (r > 0)
170 return 0;
171
172 /* Skip securityfs in a container */
173 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
174 return 0;
175
176 /* The access mode here doesn't really matter too much, since
177 * the mounted file system will take precedence anyway. */
178 if (relabel)
179 mkdir_p_label(p->where, 0755);
180 else
181 mkdir_p(p->where, 0755);
182
183 log_debug("Mounting %s to %s of type %s with options %s.",
184 p->what,
185 p->where,
186 p->type,
187 strna(p->options));
188
189 if (mount(p->what,
190 p->where,
191 p->type,
192 p->flags,
193 p->options) < 0) {
194 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
195 return (p->mode & MNT_FATAL) ? -errno : 0;
196 }
197
198 /* Relabel again, since we now mounted something fresh here */
199 if (relabel)
200 label_fix(p->where, false, false);
201
202 return 1;
203 }
204
205 int mount_setup_early(void) {
206 unsigned i;
207 int r = 0;
208
209 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
210
211 /* Do a minimal mount of /proc and friends to enable the most
212 * basic stuff, such as SELinux */
213 for (i = 0; i < N_EARLY_MOUNT; i ++) {
214 int j;
215
216 j = mount_one(mount_table + i, false);
217 if (r == 0)
218 r = j;
219 }
220
221 return r;
222 }
223
224 int mount_cgroup_controllers(char ***join_controllers) {
225 _cleanup_set_free_free_ Set *controllers = NULL;
226 _cleanup_fclose_ FILE *f;
227 char buf[LINE_MAX];
228 int r;
229
230 /* Mount all available cgroup controllers that are built into the kernel. */
231
232 f = fopen("/proc/cgroups", "re");
233 if (!f) {
234 log_error("Failed to enumerate cgroup controllers: %m");
235 return 0;
236 }
237
238 controllers = set_new(&string_hash_ops);
239 if (!controllers)
240 return log_oom();
241
242 /* Ignore the header line */
243 (void) fgets(buf, sizeof(buf), f);
244
245 for (;;) {
246 char *controller;
247 int enabled = 0;
248
249 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
250
251 if (feof(f))
252 break;
253
254 log_error("Failed to parse /proc/cgroups.");
255 return -EIO;
256 }
257
258 if (!enabled) {
259 free(controller);
260 continue;
261 }
262
263 r = set_consume(controllers, controller);
264 if (r < 0) {
265 log_error("Failed to add controller to set.");
266 return r;
267 }
268 }
269
270 for (;;) {
271 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
272 MountPoint p = {
273 .what = "cgroup",
274 .type = "cgroup",
275 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
276 .mode = MNT_IN_CONTAINER,
277 };
278 char ***k = NULL;
279
280 controller = set_steal_first(controllers);
281 if (!controller)
282 break;
283
284 if (join_controllers)
285 for (k = join_controllers; *k; k++)
286 if (strv_find(*k, controller))
287 break;
288
289 if (k && *k) {
290 char **i, **j;
291
292 for (i = *k, j = *k; *i; i++) {
293
294 if (!streq(*i, controller)) {
295 _cleanup_free_ char *t;
296
297 t = set_remove(controllers, *i);
298 if (!t) {
299 free(*i);
300 continue;
301 }
302 }
303
304 *(j++) = *i;
305 }
306
307 *j = NULL;
308
309 options = strv_join(*k, ",");
310 if (!options)
311 return log_oom();
312 } else {
313 options = controller;
314 controller = NULL;
315 }
316
317 where = strappend("/sys/fs/cgroup/", options);
318 if (!where)
319 return log_oom();
320
321 p.where = where;
322 p.options = options;
323
324 r = mount_one(&p, true);
325 if (r < 0)
326 return r;
327
328 if (r > 0 && k && *k) {
329 char **i;
330
331 for (i = *k; *i; i++) {
332 _cleanup_free_ char *t = NULL;
333
334 t = strappend("/sys/fs/cgroup/", *i);
335 if (!t)
336 return log_oom();
337
338 r = symlink(options, t);
339 if (r < 0 && errno != EEXIST) {
340 log_error("Failed to create symlink %s: %m", t);
341 return -errno;
342 }
343 }
344 }
345 }
346
347 /* Now that we mounted everything, let's make the tmpfs the
348 * cgroup file systems are mounted into read-only. */
349 mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
350
351 return 0;
352 }
353
354 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
355 static int nftw_cb(
356 const char *fpath,
357 const struct stat *sb,
358 int tflag,
359 struct FTW *ftwbuf) {
360
361 /* No need to label /dev twice in a row... */
362 if (_unlikely_(ftwbuf->level == 0))
363 return FTW_CONTINUE;
364
365 label_fix(fpath, false, false);
366
367 /* /run/initramfs is static data and big, no need to
368 * dynamically relabel its contents at boot... */
369 if (_unlikely_(ftwbuf->level == 1 &&
370 tflag == FTW_D &&
371 streq(fpath, "/run/initramfs")))
372 return FTW_SKIP_SUBTREE;
373
374 return FTW_CONTINUE;
375 };
376 #endif
377
378 int mount_setup(bool loaded_policy) {
379 int r;
380 unsigned i;
381
382 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
383 r = mount_one(mount_table + i, true);
384
385 if (r < 0)
386 return r;
387 }
388
389 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
390 /* Nodes in devtmpfs and /run need to be manually updated for
391 * the appropriate labels, after mounting. The other virtual
392 * API file systems like /sys and /proc do not need that, they
393 * use the same label for all their files. */
394 if (loaded_policy) {
395 usec_t before_relabel, after_relabel;
396 char timespan[FORMAT_TIMESPAN_MAX];
397
398 before_relabel = now(CLOCK_MONOTONIC);
399
400 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
401 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
402
403 after_relabel = now(CLOCK_MONOTONIC);
404
405 log_info("Relabelled /dev and /run in %s.",
406 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
407 }
408 #endif
409
410 /* Create a few default symlinks, which are normally created
411 * by udevd, but some scripts might need them before we start
412 * udevd. */
413 dev_setup(NULL);
414
415 /* Mark the root directory as shared in regards to mount
416 * propagation. The kernel defaults to "private", but we think
417 * it makes more sense to have a default of "shared" so that
418 * nspawn and the container tools work out of the box. If
419 * specific setups need other settings they can reset the
420 * propagation mode to private if needed. */
421 if (detect_container(NULL) <= 0)
422 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
423 log_warning("Failed to set up the root directory for shared mount propagation: %m");
424
425 /* Create a few directories we always want around, Note that
426 * sd_booted() checks for /run/systemd/system, so this mkdir
427 * really needs to stay for good, otherwise software that
428 * copied sd-daemon.c into their sources will misdetect
429 * systemd. */
430 mkdir_label("/run/systemd", 0755);
431 mkdir_label("/run/systemd/system", 0755);
432 mkdir_label("/run/systemd/inaccessible", 0000);
433
434 return 0;
435 }