]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
tree-wide usage of %m specifier instead of strerror(errno)
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41 #include "path-util.h"
42 #include "missing.h"
43 #include "virt.h"
44 #include "efivars.h"
45 #include "smack-util.h"
46
47 #ifndef TTY_GID
48 #define TTY_GID 5
49 #endif
50
51 typedef enum MountMode {
52 MNT_NONE = 0,
53 MNT_FATAL = 1 << 0,
54 MNT_IN_CONTAINER = 1 << 1,
55 } MountMode;
56
57 typedef struct MountPoint {
58 const char *what;
59 const char *where;
60 const char *type;
61 const char *options;
62 unsigned long flags;
63 bool (*condition_fn)(void);
64 MountMode mode;
65 } MountPoint;
66
67 /* The first three entries we might need before SELinux is up. The
68 * fourth (securityfs) is needed by IMA to load a custom policy. The
69 * other ones we can delay until SELinux and IMA are loaded. */
70 #define N_EARLY_MOUNT 5
71
72 static const MountPoint mount_table[] = {
73 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
74 NULL, MNT_FATAL|MNT_IN_CONTAINER },
75 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
78 NULL, MNT_FATAL|MNT_IN_CONTAINER },
79 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
80 NULL, MNT_NONE },
81 #ifdef HAVE_SMACK
82 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
83 use_smack, MNT_FATAL },
84 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
85 use_smack, MNT_FATAL },
86 #endif
87 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
88 NULL, MNT_FATAL|MNT_IN_CONTAINER },
89 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
90 NULL, MNT_IN_CONTAINER },
91 #ifdef HAVE_SMACK
92 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
93 use_smack, MNT_FATAL },
94 #endif
95 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
96 NULL, MNT_FATAL|MNT_IN_CONTAINER },
97 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
98 NULL, MNT_IN_CONTAINER },
99 #ifdef HAVE_XATTR
100 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
101 NULL, MNT_IN_CONTAINER },
102 #endif
103 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
104 NULL, MNT_IN_CONTAINER },
105 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
106 NULL, MNT_NONE },
107 #ifdef ENABLE_EFI
108 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 is_efi_boot, MNT_NONE },
110 #endif
111 };
112
113 /* These are API file systems that might be mounted by other software,
114 * we just list them here so that we know that we should ignore them */
115
116 static const char ignore_paths[] =
117 /* SELinux file systems */
118 "/sys/fs/selinux\0"
119 "/selinux\0"
120 /* Legacy cgroup mount points */
121 "/dev/cgroup\0"
122 "/cgroup\0"
123 /* Legacy kernel file system */
124 "/proc/bus/usb\0"
125 /* Container bind mounts */
126 "/proc/sys\0"
127 "/dev/console\0"
128 "/proc/kmsg\0";
129
130 bool mount_point_is_api(const char *path) {
131 unsigned i;
132
133 /* Checks if this mount point is considered "API", and hence
134 * should be ignored */
135
136 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
137 if (path_equal(path, mount_table[i].where))
138 return true;
139
140 return path_startswith(path, "/sys/fs/cgroup/");
141 }
142
143 bool mount_point_ignore(const char *path) {
144 const char *i;
145
146 NULSTR_FOREACH(i, ignore_paths)
147 if (path_equal(path, i))
148 return true;
149
150 return false;
151 }
152
153 static int mount_one(const MountPoint *p, bool relabel) {
154 int r;
155
156 assert(p);
157
158 if (p->condition_fn && !p->condition_fn())
159 return 0;
160
161 /* Relabel first, just in case */
162 if (relabel)
163 label_fix(p->where, true, true);
164
165 r = path_is_mount_point(p->where, true);
166 if (r < 0)
167 return r;
168
169 if (r > 0)
170 return 0;
171
172 /* Skip securityfs in a container */
173 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
174 return 0;
175
176 /* The access mode here doesn't really matter too much, since
177 * the mounted file system will take precedence anyway. */
178 mkdir_p_label(p->where, 0755);
179
180 log_debug("Mounting %s to %s of type %s with options %s.",
181 p->what,
182 p->where,
183 p->type,
184 strna(p->options));
185
186 if (mount(p->what,
187 p->where,
188 p->type,
189 p->flags,
190 p->options) < 0) {
191 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %m", p->where);
192 return (p->mode & MNT_FATAL) ? -errno : 0;
193 }
194
195 /* Relabel again, since we now mounted something fresh here */
196 if (relabel)
197 label_fix(p->where, false, false);
198
199 return 1;
200 }
201
202 int mount_setup_early(void) {
203 unsigned i;
204 int r = 0;
205
206 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
207
208 /* Do a minimal mount of /proc and friends to enable the most
209 * basic stuff, such as SELinux */
210 for (i = 0; i < N_EARLY_MOUNT; i ++) {
211 int j;
212
213 j = mount_one(mount_table + i, false);
214 if (r == 0)
215 r = j;
216 }
217
218 return r;
219 }
220
221 int mount_cgroup_controllers(char ***join_controllers) {
222 int r;
223 char buf[LINE_MAX];
224 _cleanup_set_free_free_ Set *controllers = NULL;
225 _cleanup_fclose_ FILE *f;
226
227 /* Mount all available cgroup controllers that are built into the kernel. */
228
229 f = fopen("/proc/cgroups", "re");
230 if (!f) {
231 log_error("Failed to enumerate cgroup controllers: %m");
232 return 0;
233 }
234
235 controllers = set_new(string_hash_func, string_compare_func);
236 if (!controllers)
237 return log_oom();
238
239 /* Ignore the header line */
240 (void) fgets(buf, sizeof(buf), f);
241
242 for (;;) {
243 char *controller;
244 int enabled = 0;
245
246 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
247
248 if (feof(f))
249 break;
250
251 log_error("Failed to parse /proc/cgroups.");
252 return -EIO;
253 }
254
255 if (!enabled) {
256 free(controller);
257 continue;
258 }
259
260 r = set_consume(controllers, controller);
261 if (r < 0) {
262 log_error("Failed to add controller to set.");
263 return r;
264 }
265 }
266
267 for (;;) {
268 MountPoint p = {
269 .what = "cgroup",
270 .type = "cgroup",
271 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
272 .mode = MNT_IN_CONTAINER,
273 };
274 char ***k = NULL;
275 _cleanup_free_ char *options = NULL, *controller;
276
277 controller = set_steal_first(controllers);
278 if (!controller)
279 break;
280
281 if (join_controllers)
282 for (k = join_controllers; *k; k++)
283 if (strv_find(*k, controller))
284 break;
285
286 if (k && *k) {
287 char **i, **j;
288
289 for (i = *k, j = *k; *i; i++) {
290
291 if (!streq(*i, controller)) {
292 char _cleanup_free_ *t;
293
294 t = set_remove(controllers, *i);
295 if (!t) {
296 free(*i);
297 continue;
298 }
299 }
300
301 *(j++) = *i;
302 }
303
304 *j = NULL;
305
306 options = strv_join(*k, ",");
307 if (!options)
308 return log_oom();
309 } else {
310 options = controller;
311 controller = NULL;
312 }
313
314 p.where = strappenda("/sys/fs/cgroup/", options);
315 p.options = options;
316
317 r = mount_one(&p, true);
318 if (r < 0)
319 return r;
320
321 if (r > 0 && k && *k) {
322 char **i;
323
324 for (i = *k; *i; i++) {
325 char *t = strappenda("/sys/fs/cgroup/", *i);
326
327 r = symlink(options, t);
328 if (r < 0 && errno != EEXIST) {
329 log_error("Failed to create symlink %s: %m", t);
330 return -errno;
331 }
332 }
333 }
334 }
335
336 return 0;
337 }
338
339 static int nftw_cb(
340 const char *fpath,
341 const struct stat *sb,
342 int tflag,
343 struct FTW *ftwbuf) {
344
345 /* No need to label /dev twice in a row... */
346 if (_unlikely_(ftwbuf->level == 0))
347 return FTW_CONTINUE;
348
349 label_fix(fpath, false, false);
350
351 /* /run/initramfs is static data and big, no need to
352 * dynamically relabel its contents at boot... */
353 if (_unlikely_(ftwbuf->level == 1 &&
354 tflag == FTW_D &&
355 streq(fpath, "/run/initramfs")))
356 return FTW_SKIP_SUBTREE;
357
358 return FTW_CONTINUE;
359 };
360
361 int mount_setup(bool loaded_policy) {
362 int r;
363 unsigned i;
364
365 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
366 r = mount_one(mount_table + i, true);
367
368 if (r < 0)
369 return r;
370 }
371
372 /* Nodes in devtmpfs and /run need to be manually updated for
373 * the appropriate labels, after mounting. The other virtual
374 * API file systems like /sys and /proc do not need that, they
375 * use the same label for all their files. */
376 if (loaded_policy) {
377 usec_t before_relabel, after_relabel;
378 char timespan[FORMAT_TIMESPAN_MAX];
379
380 before_relabel = now(CLOCK_MONOTONIC);
381
382 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
383 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
384
385 after_relabel = now(CLOCK_MONOTONIC);
386
387 log_info("Relabelled /dev and /run in %s.",
388 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
389 }
390
391 /* Create a few default symlinks, which are normally created
392 * by udevd, but some scripts might need them before we start
393 * udevd. */
394 dev_setup(NULL);
395
396 /* Mark the root directory as shared in regards to mount
397 * propagation. The kernel defaults to "private", but we think
398 * it makes more sense to have a default of "shared" so that
399 * nspawn and the container tools work out of the box. If
400 * specific setups need other settings they can reset the
401 * propagation mode to private if needed. */
402 if (detect_container(NULL) <= 0)
403 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
404 log_warning("Failed to set up the root directory for shared mount propagation: %m");
405
406 /* Create a few directories we always want around, Note that
407 * sd_booted() checks for /run/systemd/system, so this mkdir
408 * really needs to stay for good, otherwise software that
409 * copied sd-daemon.c into their sources will misdetect
410 * systemd. */
411 mkdir_label("/run/systemd", 0755);
412 mkdir_label("/run/systemd/system", 0755);
413 mkdir_label("/run/systemd/inaccessible", 0000);
414
415 return 0;
416 }