]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/mount-setup.c
mount-setup: ignore common container bind mounts
[thirdparty/systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <sys/stat.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libgen.h>
28 #include <assert.h>
29 #include <unistd.h>
30 #include <ftw.h>
31
32 #include "mount-setup.h"
33 #include "dev-setup.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "label.h"
38 #include "set.h"
39 #include "strv.h"
40 #include "mkdir.h"
41
42 #ifndef TTY_GID
43 #define TTY_GID 5
44 #endif
45
46 typedef struct MountPoint {
47 const char *what;
48 const char *where;
49 const char *type;
50 const char *options;
51 unsigned long flags;
52 bool fatal;
53 } MountPoint;
54
55 /* The first three entries we might need before SELinux is up. The
56 * fourth (securityfs) is needed by IMA to load a custom policy. The
57 * other ones we can delay until SELinux and IMA are loaded. */
58 #define N_EARLY_MOUNT 4
59
60 static const MountPoint mount_table[] = {
61 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
62 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
63 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
64 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
65 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
66 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, false },
67 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
68 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, false },
69 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
70 };
71
72 /* These are API file systems that might be mounted by other software,
73 * we just list them here so that we know that we should ignore them */
74
75 static const char ignore_paths[] =
76 /* SELinux file systems */
77 "/sys/fs/selinux\0"
78 "/selinux\0"
79 /* Legacy cgroup mount points */
80 "/dev/cgroup\0"
81 "/cgroup\0"
82 /* Legacy kernel file system */
83 "/proc/bus/usb\0"
84 /* Container bind mounts */
85 "/proc/sys\0"
86 "/dev/console\0"
87 "/proc/kmsg\0"
88 "/etc/localtime\0"
89 "/etc/timezone\0"
90 "/etc/machine-id\0";
91
92 bool mount_point_is_api(const char *path) {
93 unsigned i;
94
95 /* Checks if this mount point is considered "API", and hence
96 * should be ignored */
97
98 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
99 if (path_equal(path, mount_table[i].where))
100 return true;
101
102 return path_startswith(path, "/sys/fs/cgroup/");
103 }
104
105 bool mount_point_ignore(const char *path) {
106 const char *i;
107
108 NULSTR_FOREACH(i, ignore_paths)
109 if (path_equal(path, i))
110 return true;
111
112 return false;
113 }
114
115 static int mount_one(const MountPoint *p, bool relabel) {
116 int r;
117
118 assert(p);
119
120 /* Relabel first, just in case */
121 if (relabel)
122 label_fix(p->where, true);
123
124 if ((r = path_is_mount_point(p->where, true)) < 0)
125 return r;
126
127 if (r > 0)
128 return 0;
129
130 /* The access mode here doesn't really matter too much, since
131 * the mounted file system will take precedence anyway. */
132 mkdir_p(p->where, 0755);
133
134 log_debug("Mounting %s to %s of type %s with options %s.",
135 p->what,
136 p->where,
137 p->type,
138 strna(p->options));
139
140 if (mount(p->what,
141 p->where,
142 p->type,
143 p->flags,
144 p->options) < 0) {
145 log_full(p->fatal ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
146 return p->fatal ? -errno : 0;
147 }
148
149 /* Relabel again, since we now mounted something fresh here */
150 if (relabel)
151 label_fix(p->where, false);
152
153 return 1;
154 }
155
156 int mount_setup_early(void) {
157 unsigned i;
158 int r = 0;
159
160 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
161
162 /* Do a minimal mount of /proc and friends to enable the most
163 * basic stuff, such as SELinux */
164 for (i = 0; i < N_EARLY_MOUNT; i ++) {
165 int j;
166
167 j = mount_one(mount_table + i, false);
168 if (r == 0)
169 r = j;
170 }
171
172 return r;
173 }
174
175 int mount_cgroup_controllers(char ***join_controllers) {
176 int r;
177 FILE *f;
178 char buf[LINE_MAX];
179 Set *controllers;
180
181 /* Mount all available cgroup controllers that are built into the kernel. */
182
183 f = fopen("/proc/cgroups", "re");
184 if (!f) {
185 log_error("Failed to enumerate cgroup controllers: %m");
186 return 0;
187 }
188
189 controllers = set_new(string_hash_func, string_compare_func);
190 if (!controllers) {
191 r = -ENOMEM;
192 log_error("Failed to allocate controller set.");
193 goto finish;
194 }
195
196 /* Ignore the header line */
197 (void) fgets(buf, sizeof(buf), f);
198
199 for (;;) {
200 char *controller;
201 int enabled = 0;
202
203 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
204
205 if (feof(f))
206 break;
207
208 log_error("Failed to parse /proc/cgroups.");
209 r = -EIO;
210 goto finish;
211 }
212
213 if (!enabled) {
214 free(controller);
215 continue;
216 }
217
218 r = set_put(controllers, controller);
219 if (r < 0) {
220 log_error("Failed to add controller to set.");
221 free(controller);
222 goto finish;
223 }
224 }
225
226 for (;;) {
227 MountPoint p;
228 char *controller, *where, *options;
229 char ***k = NULL;
230
231 controller = set_steal_first(controllers);
232 if (!controller)
233 break;
234
235 if (join_controllers)
236 for (k = join_controllers; *k; k++)
237 if (strv_find(*k, controller))
238 break;
239
240 if (k && *k) {
241 char **i, **j;
242
243 for (i = *k, j = *k; *i; i++) {
244
245 if (!streq(*i, controller)) {
246 char *t;
247
248 t = set_remove(controllers, *i);
249 if (!t) {
250 free(*i);
251 continue;
252 }
253 free(t);
254 }
255
256 *(j++) = *i;
257 }
258
259 *j = NULL;
260
261 options = strv_join(*k, ",");
262 if (!options) {
263 log_error("Failed to join options");
264 free(controller);
265 r = -ENOMEM;
266 goto finish;
267 }
268
269 } else {
270 options = controller;
271 controller = NULL;
272 }
273
274 where = strappend("/sys/fs/cgroup/", options);
275 if (!where) {
276 log_error("Failed to build path");
277 free(options);
278 r = -ENOMEM;
279 goto finish;
280 }
281
282 zero(p);
283 p.what = "cgroup";
284 p.where = where;
285 p.type = "cgroup";
286 p.options = options;
287 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
288 p.fatal = false;
289
290 r = mount_one(&p, true);
291 free(controller);
292 free(where);
293
294 if (r < 0) {
295 free(options);
296 goto finish;
297 }
298
299 if (r > 0 && k && *k) {
300 char **i;
301
302 for (i = *k; *i; i++) {
303 char *t;
304
305 t = strappend("/sys/fs/cgroup/", *i);
306 if (!t) {
307 log_error("Failed to build path");
308 r = -ENOMEM;
309 free(options);
310 goto finish;
311 }
312
313 r = symlink(options, t);
314 free(t);
315
316 if (r < 0 && errno != EEXIST) {
317 log_error("Failed to create symlink: %m");
318 r = -errno;
319 free(options);
320 goto finish;
321 }
322 }
323 }
324
325 free(options);
326 }
327
328 r = 0;
329
330 finish:
331 set_free_free(controllers);
332
333 fclose(f);
334
335 return r;
336 }
337
338 static int nftw_cb(
339 const char *fpath,
340 const struct stat *sb,
341 int tflag,
342 struct FTW *ftwbuf) {
343
344 /* No need to label /dev twice in a row... */
345 if (_unlikely_(ftwbuf->level == 0))
346 return FTW_CONTINUE;
347
348 label_fix(fpath, true);
349
350 /* /run/initramfs is static data and big, no need to
351 * dynamically relabel its contents at boot... */
352 if (_unlikely_(ftwbuf->level == 1 &&
353 tflag == FTW_D &&
354 streq(fpath, "/run/initramfs")))
355 return FTW_SKIP_SUBTREE;
356
357 return FTW_CONTINUE;
358 };
359
360 int mount_setup(bool loaded_policy) {
361
362 static const char relabel[] =
363 "/run/initramfs/root-fsck\0"
364 "/run/initramfs/shutdown\0";
365
366 int r;
367 unsigned i;
368 const char *j;
369
370 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
371 r = mount_one(mount_table + i, true);
372
373 if (r < 0)
374 return r;
375 }
376
377 /* Nodes in devtmpfs and /run need to be manually updated for
378 * the appropriate labels, after mounting. The other virtual
379 * API file systems like /sys and /proc do not need that, they
380 * use the same label for all their files. */
381 if (loaded_policy) {
382 usec_t before_relabel, after_relabel;
383 char timespan[FORMAT_TIMESPAN_MAX];
384
385 before_relabel = now(CLOCK_MONOTONIC);
386
387 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
388 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
389
390 /* Explicitly relabel these */
391 NULSTR_FOREACH(j, relabel)
392 label_fix(j, true);
393
394 after_relabel = now(CLOCK_MONOTONIC);
395
396 log_info("Relabelled /dev and /run in %s.",
397 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
398 }
399
400 /* Create a few default symlinks, which are normally created
401 * by udevd, but some scripts might need them before we start
402 * udevd. */
403 dev_setup();
404
405 /* Create a few directories we always want around */
406 label_mkdir("/run/systemd", 0755);
407 label_mkdir("/run/systemd/system", 0755);
408
409 return 0;
410 }