]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/namespace.c
core: make sure PrivateDevices= makes /dev/log available
[thirdparty/systemd.git] / src / core / namespace.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
15ae422b
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
15ae422b
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
15ae422b 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
15ae422b
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <sys/mount.h>
24#include <string.h>
25#include <stdio.h>
26#include <unistd.h>
27#include <sys/stat.h>
28#include <sys/types.h>
29#include <sched.h>
30#include <sys/syscall.h>
31#include <limits.h>
25e870b5 32#include <linux/fs.h>
613b411c 33#include <sys/file.h>
15ae422b
LP
34
35#include "strv.h"
36#include "util.h"
9eb977db 37#include "path-util.h"
15ae422b
LP
38#include "namespace.h"
39#include "missing.h"
c17ec25e 40#include "execute.h"
613b411c 41#include "loopback-setup.h"
7f112f50
LP
42#include "mkdir.h"
43#include "dev-setup.h"
44#include "def.h"
15ae422b 45
c17ec25e 46typedef enum MountMode {
15ae422b
LP
47 /* This is ordered by priority! */
48 INACCESSIBLE,
49 READONLY,
ac0930c8
LP
50 PRIVATE_TMP,
51 PRIVATE_VAR_TMP,
7f112f50 52 PRIVATE_DEV,
15ae422b 53 READWRITE
c17ec25e 54} MountMode;
15ae422b 55
c17ec25e 56typedef struct BindMount {
15ae422b 57 const char *path;
c17ec25e 58 MountMode mode;
ac0930c8 59 bool done;
ea92ae33 60 bool ignore;
c17ec25e 61} BindMount;
15ae422b 62
c17ec25e 63static int append_mounts(BindMount **p, char **strv, MountMode mode) {
15ae422b
LP
64 char **i;
65
613b411c
LP
66 assert(p);
67
15ae422b
LP
68 STRV_FOREACH(i, strv) {
69
ea92ae33
MW
70 (*p)->ignore = false;
71
94828d2d 72 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
ea92ae33
MW
73 (*p)->ignore = true;
74 (*i)++;
75 }
76
15ae422b
LP
77 if (!path_is_absolute(*i))
78 return -EINVAL;
79
80 (*p)->path = *i;
81 (*p)->mode = mode;
82 (*p)++;
83 }
84
85 return 0;
86}
87
c17ec25e
MS
88static int mount_path_compare(const void *a, const void *b) {
89 const BindMount *p = a, *q = b;
15ae422b
LP
90
91 if (path_equal(p->path, q->path)) {
92
93 /* If the paths are equal, check the mode */
94 if (p->mode < q->mode)
95 return -1;
96
97 if (p->mode > q->mode)
98 return 1;
99
100 return 0;
101 }
102
103 /* If the paths are not equal, then order prefixes first */
104 if (path_startswith(p->path, q->path))
105 return 1;
106
107 if (path_startswith(q->path, p->path))
108 return -1;
109
110 return 0;
111}
112
c17ec25e
MS
113static void drop_duplicates(BindMount *m, unsigned *n) {
114 BindMount *f, *t, *previous;
15ae422b 115
c17ec25e 116 assert(m);
15ae422b 117 assert(n);
15ae422b 118
c17ec25e 119 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
15ae422b 120
ac0930c8 121 /* The first one wins */
15ae422b
LP
122 if (previous && path_equal(f->path, previous->path))
123 continue;
124
125 t->path = f->path;
126 t->mode = f->mode;
127
15ae422b
LP
128 previous = t;
129
130 t++;
131 }
132
c17ec25e 133 *n = t - m;
15ae422b
LP
134}
135
7f112f50
LP
136static int mount_dev(BindMount *m) {
137 static const char devnodes[] =
138 "/dev/null\0"
139 "/dev/zero\0"
140 "/dev/full\0"
141 "/dev/random\0"
142 "/dev/urandom\0"
143 "/dev/tty\0";
144
2b85f4e1 145 char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
82d25240 146 const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL;
7f112f50
LP
147 _cleanup_umask_ mode_t u;
148 int r;
149
150 assert(m);
151
152 u = umask(0000);
153
2b85f4e1
LP
154 if (!mkdtemp(temporary_mount))
155 return -errno;
156
157 dev = strappenda(temporary_mount, "/dev");
158 mkdir(dev, 0755);
159 if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) {
160 r = -errno;
161 goto fail;
162 }
163
164 devpts = strappenda(temporary_mount, "/dev/pts");
165 mkdir(devpts, 0755);
166 if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
167 r = -errno;
168 goto fail;
169 }
170
171 devshm = strappenda(temporary_mount, "/dev/shm");
172 mkdir(devshm, 01777);
173 r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
174 if (r < 0) {
175 r = -errno;
176 goto fail;
177 }
178
179 devmqueue = strappenda(temporary_mount, "/dev/mqueue");
180 mkdir(devmqueue, 0755);
181 mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
182
183 devkdbus = strappenda(temporary_mount, "/dev/kdbus");
184 mkdir(devkdbus, 0755);
185 mount("/dev/kdbus", devkdbus, NULL, MS_BIND, NULL);
186
187 devhugepages = strappenda(temporary_mount, "/dev/hugepages");
188 mkdir(devhugepages, 0755);
189 mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
190
82d25240
LP
191 devlog = strappenda(temporary_mount, "/dev/log");
192 symlink("/run/systemd/journal/dev-log", devlog);
193
7f112f50 194 NULSTR_FOREACH(d, devnodes) {
2b85f4e1
LP
195 _cleanup_free_ char *dn = NULL;
196 struct stat st;
197
198 r = stat(d, &st);
7f112f50 199 if (r < 0) {
2b85f4e1
LP
200
201 if (errno == ENOENT)
202 continue;
203
204 r = -errno;
205 goto fail;
7f112f50
LP
206 }
207
2b85f4e1
LP
208 if (!S_ISBLK(st.st_mode) &&
209 !S_ISCHR(st.st_mode)) {
210 r = -EINVAL;
211 goto fail;
212 }
213
214 if (st.st_rdev == 0)
215 continue;
216
217 dn = strappend(temporary_mount, d);
218 if (!dn) {
219 r = -ENOMEM;
220 goto fail;
221 }
222
223 r = mknod(dn, st.st_mode, st.st_rdev);
224 if (r < 0) {
225 r = -errno;
226 goto fail;
227 }
7f112f50
LP
228 }
229
2b85f4e1 230 dev_setup(temporary_mount);
7f112f50 231
2b85f4e1
LP
232 if (mount(dev, "/dev/", NULL, MS_MOVE, NULL) < 0) {
233 r = -errno;
234 goto fail;
235 }
7f112f50 236
2b85f4e1
LP
237 rmdir(dev);
238 rmdir(temporary_mount);
7f112f50 239
2b85f4e1 240 return 0;
7f112f50 241
2b85f4e1
LP
242fail:
243 if (devpts)
244 umount(devpts);
7f112f50 245
2b85f4e1
LP
246 if (devshm)
247 umount(devshm);
7f112f50 248
2b85f4e1
LP
249 if (devkdbus)
250 umount(devkdbus);
7f112f50 251
2b85f4e1
LP
252 if (devhugepages)
253 umount(devhugepages);
7f112f50 254
2b85f4e1
LP
255 if (devmqueue)
256 umount(devmqueue);
7f112f50 257
2b85f4e1
LP
258 if (dev) {
259 umount(dev);
260 rmdir(dev);
7f112f50
LP
261 }
262
2b85f4e1 263 rmdir(temporary_mount);
7f112f50 264
2b85f4e1 265 return r;
7f112f50
LP
266}
267
ac0930c8 268static int apply_mount(
c17ec25e 269 BindMount *m,
ac0930c8 270 const char *tmp_dir,
c17ec25e 271 const char *var_tmp_dir) {
ac0930c8 272
15ae422b 273 const char *what;
15ae422b 274 int r;
15ae422b 275
c17ec25e 276 assert(m);
15ae422b 277
c17ec25e 278 switch (m->mode) {
15ae422b 279
7f112f50
LP
280 case PRIVATE_DEV:
281 return mount_dev(m);
282
15ae422b 283 case INACCESSIBLE:
c17ec25e 284 what = "/run/systemd/inaccessible";
15ae422b
LP
285 break;
286
287 case READONLY:
15ae422b 288 case READWRITE:
c17ec25e 289 what = m->path;
15ae422b
LP
290 break;
291
ac0930c8
LP
292 case PRIVATE_TMP:
293 what = tmp_dir;
294 break;
295
296 case PRIVATE_VAR_TMP:
297 what = var_tmp_dir;
15ae422b 298 break;
e364ad06
LP
299
300 default:
301 assert_not_reached("Unknown mode");
15ae422b
LP
302 }
303
ac0930c8 304 assert(what);
15ae422b 305
c17ec25e 306 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
ac0930c8 307 if (r >= 0)
c17ec25e 308 log_debug("Successfully mounted %s to %s", what, m->path);
ea92ae33
MW
309 else if (m->ignore && errno == ENOENT)
310 r = 0;
15ae422b 311
ac0930c8
LP
312 return r;
313}
15ae422b 314
c17ec25e 315static int make_read_only(BindMount *m) {
ac0930c8 316 int r;
15ae422b 317
c17ec25e 318 assert(m);
ac0930c8 319
c17ec25e 320 if (m->mode != INACCESSIBLE && m->mode != READONLY)
ac0930c8
LP
321 return 0;
322
c17ec25e 323 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
ea92ae33 324 if (r < 0 && !(m->ignore && errno == ENOENT))
ac0930c8
LP
325 return -errno;
326
327 return 0;
15ae422b
LP
328}
329
613b411c
LP
330int setup_namespace(
331 char** read_write_dirs,
332 char** read_only_dirs,
333 char** inaccessible_dirs,
334 char* tmp_dir,
335 char* var_tmp_dir,
7f112f50 336 bool private_dev,
417116f2
LP
337 ProtectedHome protected_home,
338 bool read_only_system,
613b411c 339 unsigned mount_flags) {
15ae422b 340
7ff7394d 341 BindMount *m, *mounts = NULL;
613b411c 342 unsigned n;
c17ec25e 343 int r = 0;
15ae422b 344
613b411c 345 if (mount_flags == 0)
c17ec25e 346 mount_flags = MS_SHARED;
ac0930c8 347
d5a3f0ea
ZJS
348 if (unshare(CLONE_NEWNS) < 0)
349 return -errno;
15ae422b 350
613b411c
LP
351 n = !!tmp_dir + !!var_tmp_dir +
352 strv_length(read_write_dirs) +
353 strv_length(read_only_dirs) +
7f112f50 354 strv_length(inaccessible_dirs) +
417116f2
LP
355 private_dev +
356 (protected_home != PROTECTED_HOME_NO ? 2 : 0) +
357 (read_only_system ? 2 : 0);
613b411c
LP
358
359 if (n > 0) {
7ff7394d 360 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
613b411c
LP
361 r = append_mounts(&m, read_write_dirs, READWRITE);
362 if (r < 0)
363 return r;
364
365 r = append_mounts(&m, read_only_dirs, READONLY);
366 if (r < 0)
367 return r;
368
369 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
370 if (r < 0)
7ff7394d
ZJS
371 return r;
372
613b411c 373 if (tmp_dir) {
7ff7394d
ZJS
374 m->path = "/tmp";
375 m->mode = PRIVATE_TMP;
376 m++;
613b411c 377 }
7ff7394d 378
613b411c 379 if (var_tmp_dir) {
7ff7394d
ZJS
380 m->path = "/var/tmp";
381 m->mode = PRIVATE_VAR_TMP;
382 m++;
383 }
ac0930c8 384
7f112f50
LP
385 if (private_dev) {
386 m->path = "/dev";
387 m->mode = PRIVATE_DEV;
388 m++;
389 }
390
417116f2
LP
391 if (protected_home != PROTECTED_HOME_NO) {
392 r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
393 if (r < 0)
394 return r;
395 }
396
397 if (read_only_system) {
398 r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
399 if (r < 0)
400 return r;
401 }
402
7ff7394d 403 assert(mounts + n == m);
ac0930c8 404
7ff7394d
ZJS
405 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
406 drop_duplicates(mounts, &n);
15ae422b
LP
407 }
408
c2c13f2d
LP
409 if (n > 0) {
410 /* Remount / as SLAVE so that nothing now mounted in the namespace
411 shows up in the parent */
412 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
413 return -errno;
414
415 for (m = mounts; m < mounts + n; ++m) {
416 r = apply_mount(m, tmp_dir, var_tmp_dir);
417 if (r < 0)
418 goto fail;
419 }
15ae422b 420
c2c13f2d
LP
421 for (m = mounts; m < mounts + n; ++m) {
422 r = make_read_only(m);
423 if (r < 0)
424 goto fail;
425 }
15ae422b
LP
426 }
427
c2c13f2d
LP
428 /* Remount / as the desired mode. Not that this will not
429 * reestablish propagation from our side to the host, since
430 * what's disconnected is disconnected. */
c17ec25e 431 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
15ae422b 432 r = -errno;
613b411c 433 goto fail;
15ae422b
LP
434 }
435
15ae422b
LP
436 return 0;
437
613b411c 438fail:
c2c13f2d
LP
439 if (n > 0) {
440 for (m = mounts; m < mounts + n; ++m)
441 if (m->done)
442 umount2(m->path, MNT_DETACH);
443 }
613b411c
LP
444
445 return r;
446}
447
448static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
449 _cleanup_free_ char *x = NULL;
6b46ea73
LP
450 char bid[SD_ID128_STRING_MAX];
451 sd_id128_t boot_id;
452 int r;
613b411c
LP
453
454 assert(id);
455 assert(prefix);
456 assert(path);
457
6b46ea73
LP
458 /* We include the boot id in the directory so that after a
459 * reboot we can easily identify obsolete directories. */
460
461 r = sd_id128_get_boot(&boot_id);
462 if (r < 0)
463 return r;
464
465 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
613b411c
LP
466 if (!x)
467 return -ENOMEM;
468
469 RUN_WITH_UMASK(0077)
470 if (!mkdtemp(x))
471 return -errno;
472
473 RUN_WITH_UMASK(0000) {
474 char *y;
475
476 y = strappenda(x, "/tmp");
477
478 if (mkdir(y, 0777 | S_ISVTX) < 0)
479 return -errno;
c17ec25e 480 }
15ae422b 481
613b411c
LP
482 *path = x;
483 x = NULL;
484
485 return 0;
486}
487
488int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
489 char *a, *b;
490 int r;
491
492 assert(id);
493 assert(tmp_dir);
494 assert(var_tmp_dir);
495
496 r = setup_one_tmp_dir(id, "/tmp", &a);
497 if (r < 0)
498 return r;
499
500 r = setup_one_tmp_dir(id, "/var/tmp", &b);
501 if (r < 0) {
502 char *t;
503
504 t = strappenda(a, "/tmp");
505 rmdir(t);
506 rmdir(a);
507
508 free(a);
509 return r;
510 }
511
512 *tmp_dir = a;
513 *var_tmp_dir = b;
514
515 return 0;
516}
517
518int setup_netns(int netns_storage_socket[2]) {
519 _cleanup_close_ int netns = -1;
520 union {
521 struct cmsghdr cmsghdr;
522 uint8_t buf[CMSG_SPACE(sizeof(int))];
523 } control = {};
524 struct msghdr mh = {
525 .msg_control = &control,
526 .msg_controllen = sizeof(control),
527 };
528 struct cmsghdr *cmsg;
529 int r;
530
531 assert(netns_storage_socket);
532 assert(netns_storage_socket[0] >= 0);
533 assert(netns_storage_socket[1] >= 0);
534
535 /* We use the passed socketpair as a storage buffer for our
76cd584b
LP
536 * namespace reference fd. Whatever process runs this first
537 * shall create a new namespace, all others should just join
538 * it. To serialize that we use a file lock on the socket
539 * pair.
613b411c
LP
540 *
541 * It's a bit crazy, but hey, works great! */
542
543 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
544 return -errno;
545
546 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
547 if (errno != EAGAIN) {
548 r = -errno;
549 goto fail;
550 }
551
552 /* Nothing stored yet, so let's create a new namespace */
553
554 if (unshare(CLONE_NEWNET) < 0) {
555 r = -errno;
556 goto fail;
557 }
558
559 loopback_setup();
560
561 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
562 if (netns < 0) {
563 r = -errno;
564 goto fail;
565 }
566
567 r = 1;
568 } else {
569 /* Yay, found something, so let's join the namespace */
570
571 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
572 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
573 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
574 netns = *(int*) CMSG_DATA(cmsg);
575 }
576 }
577
578 if (setns(netns, CLONE_NEWNET) < 0) {
579 r = -errno;
580 goto fail;
581 }
582
583 r = 0;
584 }
585
586 cmsg = CMSG_FIRSTHDR(&mh);
587 cmsg->cmsg_level = SOL_SOCKET;
588 cmsg->cmsg_type = SCM_RIGHTS;
589 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
590 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
591 mh.msg_controllen = cmsg->cmsg_len;
592
593 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
594 r = -errno;
595 goto fail;
596 }
597
598fail:
599 lockf(netns_storage_socket[0], F_ULOCK, 0);
600
15ae422b
LP
601 return r;
602}
417116f2
LP
603
604static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
605 [PROTECTED_HOME_NO] = "no",
606 [PROTECTED_HOME_YES] = "yes",
607 [PROTECTED_HOME_READ_ONLY] = "read-only",
608};
609
610DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);