]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/namespace.c
busctl: when monitoring the bus, enable all credentials
[thirdparty/systemd.git] / src / core / namespace.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
15ae422b
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
15ae422b
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
15ae422b 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
15ae422b
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <sys/mount.h>
24#include <string.h>
25#include <stdio.h>
26#include <unistd.h>
27#include <sys/stat.h>
28#include <sys/types.h>
29#include <sched.h>
30#include <sys/syscall.h>
31#include <limits.h>
25e870b5 32#include <linux/fs.h>
613b411c 33#include <sys/file.h>
15ae422b
LP
34
35#include "strv.h"
36#include "util.h"
9eb977db 37#include "path-util.h"
15ae422b
LP
38#include "namespace.h"
39#include "missing.h"
c17ec25e 40#include "execute.h"
613b411c 41#include "loopback-setup.h"
7f112f50
LP
42#include "mkdir.h"
43#include "dev-setup.h"
44#include "def.h"
15ae422b 45
c17ec25e 46typedef enum MountMode {
15ae422b
LP
47 /* This is ordered by priority! */
48 INACCESSIBLE,
49 READONLY,
ac0930c8
LP
50 PRIVATE_TMP,
51 PRIVATE_VAR_TMP,
7f112f50 52 PRIVATE_DEV,
15ae422b 53 READWRITE
c17ec25e 54} MountMode;
15ae422b 55
c17ec25e 56typedef struct BindMount {
15ae422b 57 const char *path;
c17ec25e 58 MountMode mode;
ac0930c8 59 bool done;
ea92ae33 60 bool ignore;
c17ec25e 61} BindMount;
15ae422b 62
c17ec25e 63static int append_mounts(BindMount **p, char **strv, MountMode mode) {
15ae422b
LP
64 char **i;
65
613b411c
LP
66 assert(p);
67
15ae422b
LP
68 STRV_FOREACH(i, strv) {
69
ea92ae33
MW
70 (*p)->ignore = false;
71
94828d2d 72 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
ea92ae33
MW
73 (*p)->ignore = true;
74 (*i)++;
75 }
76
15ae422b
LP
77 if (!path_is_absolute(*i))
78 return -EINVAL;
79
80 (*p)->path = *i;
81 (*p)->mode = mode;
82 (*p)++;
83 }
84
85 return 0;
86}
87
c17ec25e
MS
88static int mount_path_compare(const void *a, const void *b) {
89 const BindMount *p = a, *q = b;
15ae422b
LP
90
91 if (path_equal(p->path, q->path)) {
92
93 /* If the paths are equal, check the mode */
94 if (p->mode < q->mode)
95 return -1;
96
97 if (p->mode > q->mode)
98 return 1;
99
100 return 0;
101 }
102
103 /* If the paths are not equal, then order prefixes first */
104 if (path_startswith(p->path, q->path))
105 return 1;
106
107 if (path_startswith(q->path, p->path))
108 return -1;
109
110 return 0;
111}
112
c17ec25e
MS
113static void drop_duplicates(BindMount *m, unsigned *n) {
114 BindMount *f, *t, *previous;
15ae422b 115
c17ec25e 116 assert(m);
15ae422b 117 assert(n);
15ae422b 118
c17ec25e 119 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
15ae422b 120
ac0930c8 121 /* The first one wins */
15ae422b
LP
122 if (previous && path_equal(f->path, previous->path))
123 continue;
124
125 t->path = f->path;
126 t->mode = f->mode;
127
15ae422b
LP
128 previous = t;
129
130 t++;
131 }
132
c17ec25e 133 *n = t - m;
15ae422b
LP
134}
135
7f112f50
LP
136static int mount_dev(BindMount *m) {
137 static const char devnodes[] =
138 "/dev/null\0"
139 "/dev/zero\0"
140 "/dev/full\0"
141 "/dev/random\0"
142 "/dev/urandom\0"
143 "/dev/tty\0";
144
145 struct stat devnodes_stat[6] = {};
146 const char *d;
147 unsigned n = 0;
148 _cleanup_umask_ mode_t u;
149 int r;
150
151 assert(m);
152
153 u = umask(0000);
154
155 /* First: record device mode_t and dev_t */
156 NULSTR_FOREACH(d, devnodes) {
157 r = stat(d, &devnodes_stat[n]);
158 if (r < 0) {
159 if (errno != ENOENT)
160 return -errno;
161 } else {
162 if (!S_ISBLK(devnodes_stat[n].st_mode) &&
163 !S_ISCHR(devnodes_stat[n].st_mode))
164 return -EINVAL;
165 }
166
167 n++;
168 }
169
170 assert(n == ELEMENTSOF(devnodes_stat));
171
172 r = mount("tmpfs", "/dev", "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755");
173 if (r < 0)
174 return m->ignore ? 0 : -errno;
175
176
177 mkdir_p("/dev/pts", 0755);
178
179 r = mount("devpts", "/dev/pts", "devpts", MS_NOSUID|MS_NOEXEC, "newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID));
180 if (r < 0)
181 return m->ignore ? 0 : -errno;
182
183 mkdir_p("/dev/shm", 0755);
184
185 r = mount("tmpfs", "/dev/shm", "tmpfs", MS_NOSUID|MS_NODEV|MS_STRICTATIME, "mode=1777");
186 if (r < 0)
187 return m->ignore ? 0 : -errno;
188
189 /* Second: actually create it */
190 n = 0;
191 NULSTR_FOREACH(d, devnodes) {
192 if (devnodes_stat[n].st_rdev == 0)
193 continue;
194
195 r = mknod(d, devnodes_stat[n].st_mode, devnodes_stat[n].st_rdev);
196 if (r < 0)
197 return m->ignore ? 0 : -errno;
198
199 n++;
200 }
201
202 dev_setup(NULL);
203
204 return 0;
205}
206
ac0930c8 207static int apply_mount(
c17ec25e 208 BindMount *m,
ac0930c8 209 const char *tmp_dir,
c17ec25e 210 const char *var_tmp_dir) {
ac0930c8 211
15ae422b 212 const char *what;
15ae422b 213 int r;
15ae422b 214
c17ec25e 215 assert(m);
15ae422b 216
c17ec25e 217 switch (m->mode) {
15ae422b 218
7f112f50
LP
219 case PRIVATE_DEV:
220 return mount_dev(m);
221
15ae422b 222 case INACCESSIBLE:
c17ec25e 223 what = "/run/systemd/inaccessible";
15ae422b
LP
224 break;
225
226 case READONLY:
15ae422b 227 case READWRITE:
c17ec25e 228 what = m->path;
15ae422b
LP
229 break;
230
ac0930c8
LP
231 case PRIVATE_TMP:
232 what = tmp_dir;
233 break;
234
235 case PRIVATE_VAR_TMP:
236 what = var_tmp_dir;
15ae422b 237 break;
e364ad06
LP
238
239 default:
240 assert_not_reached("Unknown mode");
15ae422b
LP
241 }
242
ac0930c8 243 assert(what);
15ae422b 244
c17ec25e 245 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
ac0930c8 246 if (r >= 0)
c17ec25e 247 log_debug("Successfully mounted %s to %s", what, m->path);
ea92ae33
MW
248 else if (m->ignore && errno == ENOENT)
249 r = 0;
15ae422b 250
ac0930c8
LP
251 return r;
252}
15ae422b 253
c17ec25e 254static int make_read_only(BindMount *m) {
ac0930c8 255 int r;
15ae422b 256
c17ec25e 257 assert(m);
ac0930c8 258
c17ec25e 259 if (m->mode != INACCESSIBLE && m->mode != READONLY)
ac0930c8
LP
260 return 0;
261
c17ec25e 262 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
ea92ae33 263 if (r < 0 && !(m->ignore && errno == ENOENT))
ac0930c8
LP
264 return -errno;
265
266 return 0;
15ae422b
LP
267}
268
613b411c
LP
269int setup_namespace(
270 char** read_write_dirs,
271 char** read_only_dirs,
272 char** inaccessible_dirs,
273 char* tmp_dir,
274 char* var_tmp_dir,
7f112f50 275 bool private_dev,
613b411c 276 unsigned mount_flags) {
15ae422b 277
7ff7394d 278 BindMount *m, *mounts = NULL;
613b411c 279 unsigned n;
c17ec25e 280 int r = 0;
15ae422b 281
613b411c 282 if (mount_flags == 0)
c17ec25e 283 mount_flags = MS_SHARED;
ac0930c8 284
d5a3f0ea
ZJS
285 if (unshare(CLONE_NEWNS) < 0)
286 return -errno;
15ae422b 287
613b411c
LP
288 n = !!tmp_dir + !!var_tmp_dir +
289 strv_length(read_write_dirs) +
290 strv_length(read_only_dirs) +
7f112f50
LP
291 strv_length(inaccessible_dirs) +
292 private_dev;
613b411c
LP
293
294 if (n > 0) {
7ff7394d 295 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
613b411c
LP
296 r = append_mounts(&m, read_write_dirs, READWRITE);
297 if (r < 0)
298 return r;
299
300 r = append_mounts(&m, read_only_dirs, READONLY);
301 if (r < 0)
302 return r;
303
304 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
305 if (r < 0)
7ff7394d
ZJS
306 return r;
307
613b411c 308 if (tmp_dir) {
7ff7394d
ZJS
309 m->path = "/tmp";
310 m->mode = PRIVATE_TMP;
311 m++;
613b411c 312 }
7ff7394d 313
613b411c 314 if (var_tmp_dir) {
7ff7394d
ZJS
315 m->path = "/var/tmp";
316 m->mode = PRIVATE_VAR_TMP;
317 m++;
318 }
ac0930c8 319
7f112f50
LP
320 if (private_dev) {
321 m->path = "/dev";
322 m->mode = PRIVATE_DEV;
323 m++;
324 }
325
7ff7394d 326 assert(mounts + n == m);
ac0930c8 327
7ff7394d
ZJS
328 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
329 drop_duplicates(mounts, &n);
15ae422b
LP
330 }
331
ac0930c8 332 /* Remount / as SLAVE so that nothing now mounted in the namespace
dc4b0200 333 shows up in the parent */
d5a3f0ea
ZJS
334 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
335 return -errno;
15ae422b 336
c17ec25e
MS
337 for (m = mounts; m < mounts + n; ++m) {
338 r = apply_mount(m, tmp_dir, var_tmp_dir);
c1d70f7c 339 if (r < 0)
613b411c 340 goto fail;
c1d70f7c 341 }
15ae422b 342
c17ec25e
MS
343 for (m = mounts; m < mounts + n; ++m) {
344 r = make_read_only(m);
ac0930c8 345 if (r < 0)
613b411c 346 goto fail;
15ae422b
LP
347 }
348
ac0930c8 349 /* Remount / as the desired mode */
c17ec25e 350 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
15ae422b 351 r = -errno;
613b411c 352 goto fail;
15ae422b
LP
353 }
354
15ae422b
LP
355 return 0;
356
613b411c
LP
357fail:
358 for (m = mounts; m < mounts + n; ++m)
c17ec25e
MS
359 if (m->done)
360 umount2(m->path, MNT_DETACH);
613b411c
LP
361
362 return r;
363}
364
365static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
366 _cleanup_free_ char *x = NULL;
6b46ea73
LP
367 char bid[SD_ID128_STRING_MAX];
368 sd_id128_t boot_id;
369 int r;
613b411c
LP
370
371 assert(id);
372 assert(prefix);
373 assert(path);
374
6b46ea73
LP
375 /* We include the boot id in the directory so that after a
376 * reboot we can easily identify obsolete directories. */
377
378 r = sd_id128_get_boot(&boot_id);
379 if (r < 0)
380 return r;
381
382 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
613b411c
LP
383 if (!x)
384 return -ENOMEM;
385
386 RUN_WITH_UMASK(0077)
387 if (!mkdtemp(x))
388 return -errno;
389
390 RUN_WITH_UMASK(0000) {
391 char *y;
392
393 y = strappenda(x, "/tmp");
394
395 if (mkdir(y, 0777 | S_ISVTX) < 0)
396 return -errno;
c17ec25e 397 }
15ae422b 398
613b411c
LP
399 *path = x;
400 x = NULL;
401
402 return 0;
403}
404
405int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
406 char *a, *b;
407 int r;
408
409 assert(id);
410 assert(tmp_dir);
411 assert(var_tmp_dir);
412
413 r = setup_one_tmp_dir(id, "/tmp", &a);
414 if (r < 0)
415 return r;
416
417 r = setup_one_tmp_dir(id, "/var/tmp", &b);
418 if (r < 0) {
419 char *t;
420
421 t = strappenda(a, "/tmp");
422 rmdir(t);
423 rmdir(a);
424
425 free(a);
426 return r;
427 }
428
429 *tmp_dir = a;
430 *var_tmp_dir = b;
431
432 return 0;
433}
434
435int setup_netns(int netns_storage_socket[2]) {
436 _cleanup_close_ int netns = -1;
437 union {
438 struct cmsghdr cmsghdr;
439 uint8_t buf[CMSG_SPACE(sizeof(int))];
440 } control = {};
441 struct msghdr mh = {
442 .msg_control = &control,
443 .msg_controllen = sizeof(control),
444 };
445 struct cmsghdr *cmsg;
446 int r;
447
448 assert(netns_storage_socket);
449 assert(netns_storage_socket[0] >= 0);
450 assert(netns_storage_socket[1] >= 0);
451
452 /* We use the passed socketpair as a storage buffer for our
76cd584b
LP
453 * namespace reference fd. Whatever process runs this first
454 * shall create a new namespace, all others should just join
455 * it. To serialize that we use a file lock on the socket
456 * pair.
613b411c
LP
457 *
458 * It's a bit crazy, but hey, works great! */
459
460 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
461 return -errno;
462
463 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
464 if (errno != EAGAIN) {
465 r = -errno;
466 goto fail;
467 }
468
469 /* Nothing stored yet, so let's create a new namespace */
470
471 if (unshare(CLONE_NEWNET) < 0) {
472 r = -errno;
473 goto fail;
474 }
475
476 loopback_setup();
477
478 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
479 if (netns < 0) {
480 r = -errno;
481 goto fail;
482 }
483
484 r = 1;
485 } else {
486 /* Yay, found something, so let's join the namespace */
487
488 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
489 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
490 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
491 netns = *(int*) CMSG_DATA(cmsg);
492 }
493 }
494
495 if (setns(netns, CLONE_NEWNET) < 0) {
496 r = -errno;
497 goto fail;
498 }
499
500 r = 0;
501 }
502
503 cmsg = CMSG_FIRSTHDR(&mh);
504 cmsg->cmsg_level = SOL_SOCKET;
505 cmsg->cmsg_type = SCM_RIGHTS;
506 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
507 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
508 mh.msg_controllen = cmsg->cmsg_len;
509
510 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
511 r = -errno;
512 goto fail;
513 }
514
515fail:
516 lockf(netns_storage_socket[0], F_ULOCK, 0);
517
15ae422b
LP
518 return r;
519}