]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/namespace.c
hwdb: fix case-sensitive match
[thirdparty/systemd.git] / src / core / namespace.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
15ae422b
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
15ae422b
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
15ae422b 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
15ae422b
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <sys/mount.h>
24#include <string.h>
25#include <stdio.h>
26#include <unistd.h>
27#include <sys/stat.h>
28#include <sys/types.h>
29#include <sched.h>
30#include <sys/syscall.h>
31#include <limits.h>
25e870b5 32#include <linux/fs.h>
613b411c 33#include <sys/file.h>
15ae422b
LP
34
35#include "strv.h"
36#include "util.h"
9eb977db 37#include "path-util.h"
15ae422b
LP
38#include "namespace.h"
39#include "missing.h"
c17ec25e 40#include "execute.h"
613b411c 41#include "loopback-setup.h"
7f112f50
LP
42#include "mkdir.h"
43#include "dev-setup.h"
44#include "def.h"
15ae422b 45
c17ec25e 46typedef enum MountMode {
15ae422b
LP
47 /* This is ordered by priority! */
48 INACCESSIBLE,
49 READONLY,
ac0930c8
LP
50 PRIVATE_TMP,
51 PRIVATE_VAR_TMP,
7f112f50 52 PRIVATE_DEV,
15ae422b 53 READWRITE
c17ec25e 54} MountMode;
15ae422b 55
c17ec25e 56typedef struct BindMount {
15ae422b 57 const char *path;
c17ec25e 58 MountMode mode;
ac0930c8 59 bool done;
ea92ae33 60 bool ignore;
c17ec25e 61} BindMount;
15ae422b 62
c17ec25e 63static int append_mounts(BindMount **p, char **strv, MountMode mode) {
15ae422b
LP
64 char **i;
65
613b411c
LP
66 assert(p);
67
15ae422b
LP
68 STRV_FOREACH(i, strv) {
69
ea92ae33
MW
70 (*p)->ignore = false;
71
94828d2d 72 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
ea92ae33
MW
73 (*p)->ignore = true;
74 (*i)++;
75 }
76
15ae422b
LP
77 if (!path_is_absolute(*i))
78 return -EINVAL;
79
80 (*p)->path = *i;
81 (*p)->mode = mode;
82 (*p)++;
83 }
84
85 return 0;
86}
87
c17ec25e
MS
88static int mount_path_compare(const void *a, const void *b) {
89 const BindMount *p = a, *q = b;
15ae422b
LP
90
91 if (path_equal(p->path, q->path)) {
92
93 /* If the paths are equal, check the mode */
94 if (p->mode < q->mode)
95 return -1;
96
97 if (p->mode > q->mode)
98 return 1;
99
100 return 0;
101 }
102
103 /* If the paths are not equal, then order prefixes first */
104 if (path_startswith(p->path, q->path))
105 return 1;
106
107 if (path_startswith(q->path, p->path))
108 return -1;
109
110 return 0;
111}
112
c17ec25e
MS
113static void drop_duplicates(BindMount *m, unsigned *n) {
114 BindMount *f, *t, *previous;
15ae422b 115
c17ec25e 116 assert(m);
15ae422b 117 assert(n);
15ae422b 118
c17ec25e 119 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
15ae422b 120
ac0930c8 121 /* The first one wins */
15ae422b
LP
122 if (previous && path_equal(f->path, previous->path))
123 continue;
124
125 t->path = f->path;
126 t->mode = f->mode;
127
15ae422b
LP
128 previous = t;
129
130 t++;
131 }
132
c17ec25e 133 *n = t - m;
15ae422b
LP
134}
135
7f112f50
LP
136static int mount_dev(BindMount *m) {
137 static const char devnodes[] =
138 "/dev/null\0"
139 "/dev/zero\0"
140 "/dev/full\0"
141 "/dev/random\0"
142 "/dev/urandom\0"
143 "/dev/tty\0";
144
2b85f4e1 145 char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
e06b6479 146 const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
7f112f50
LP
147 _cleanup_umask_ mode_t u;
148 int r;
149
150 assert(m);
151
152 u = umask(0000);
153
2b85f4e1
LP
154 if (!mkdtemp(temporary_mount))
155 return -errno;
156
157 dev = strappenda(temporary_mount, "/dev");
158 mkdir(dev, 0755);
159 if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) {
160 r = -errno;
161 goto fail;
162 }
163
164 devpts = strappenda(temporary_mount, "/dev/pts");
165 mkdir(devpts, 0755);
166 if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
167 r = -errno;
168 goto fail;
169 }
170
e06b6479
LP
171 devptmx = strappenda(temporary_mount, "/dev/ptmx");
172 symlink("pts/ptmx", devptmx);
173
2b85f4e1
LP
174 devshm = strappenda(temporary_mount, "/dev/shm");
175 mkdir(devshm, 01777);
176 r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
177 if (r < 0) {
178 r = -errno;
179 goto fail;
180 }
181
182 devmqueue = strappenda(temporary_mount, "/dev/mqueue");
183 mkdir(devmqueue, 0755);
184 mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
185
186 devkdbus = strappenda(temporary_mount, "/dev/kdbus");
187 mkdir(devkdbus, 0755);
188 mount("/dev/kdbus", devkdbus, NULL, MS_BIND, NULL);
189
190 devhugepages = strappenda(temporary_mount, "/dev/hugepages");
191 mkdir(devhugepages, 0755);
192 mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
193
82d25240
LP
194 devlog = strappenda(temporary_mount, "/dev/log");
195 symlink("/run/systemd/journal/dev-log", devlog);
196
7f112f50 197 NULSTR_FOREACH(d, devnodes) {
2b85f4e1
LP
198 _cleanup_free_ char *dn = NULL;
199 struct stat st;
200
201 r = stat(d, &st);
7f112f50 202 if (r < 0) {
2b85f4e1
LP
203
204 if (errno == ENOENT)
205 continue;
206
207 r = -errno;
208 goto fail;
7f112f50
LP
209 }
210
2b85f4e1
LP
211 if (!S_ISBLK(st.st_mode) &&
212 !S_ISCHR(st.st_mode)) {
213 r = -EINVAL;
214 goto fail;
215 }
216
217 if (st.st_rdev == 0)
218 continue;
219
220 dn = strappend(temporary_mount, d);
221 if (!dn) {
222 r = -ENOMEM;
223 goto fail;
224 }
225
226 r = mknod(dn, st.st_mode, st.st_rdev);
227 if (r < 0) {
228 r = -errno;
229 goto fail;
230 }
7f112f50
LP
231 }
232
2b85f4e1 233 dev_setup(temporary_mount);
7f112f50 234
2b85f4e1
LP
235 if (mount(dev, "/dev/", NULL, MS_MOVE, NULL) < 0) {
236 r = -errno;
237 goto fail;
238 }
7f112f50 239
2b85f4e1
LP
240 rmdir(dev);
241 rmdir(temporary_mount);
7f112f50 242
2b85f4e1 243 return 0;
7f112f50 244
2b85f4e1
LP
245fail:
246 if (devpts)
247 umount(devpts);
7f112f50 248
2b85f4e1
LP
249 if (devshm)
250 umount(devshm);
7f112f50 251
2b85f4e1
LP
252 if (devkdbus)
253 umount(devkdbus);
7f112f50 254
2b85f4e1
LP
255 if (devhugepages)
256 umount(devhugepages);
7f112f50 257
2b85f4e1
LP
258 if (devmqueue)
259 umount(devmqueue);
7f112f50 260
2b85f4e1
LP
261 if (dev) {
262 umount(dev);
263 rmdir(dev);
7f112f50
LP
264 }
265
2b85f4e1 266 rmdir(temporary_mount);
7f112f50 267
2b85f4e1 268 return r;
7f112f50
LP
269}
270
ac0930c8 271static int apply_mount(
c17ec25e 272 BindMount *m,
ac0930c8 273 const char *tmp_dir,
c17ec25e 274 const char *var_tmp_dir) {
ac0930c8 275
15ae422b 276 const char *what;
15ae422b 277 int r;
15ae422b 278
c17ec25e 279 assert(m);
15ae422b 280
c17ec25e 281 switch (m->mode) {
15ae422b 282
7f112f50
LP
283 case PRIVATE_DEV:
284 return mount_dev(m);
285
15ae422b 286 case INACCESSIBLE:
c17ec25e 287 what = "/run/systemd/inaccessible";
15ae422b
LP
288 break;
289
290 case READONLY:
15ae422b 291 case READWRITE:
c17ec25e 292 what = m->path;
15ae422b
LP
293 break;
294
ac0930c8
LP
295 case PRIVATE_TMP:
296 what = tmp_dir;
297 break;
298
299 case PRIVATE_VAR_TMP:
300 what = var_tmp_dir;
15ae422b 301 break;
e364ad06
LP
302
303 default:
304 assert_not_reached("Unknown mode");
15ae422b
LP
305 }
306
ac0930c8 307 assert(what);
15ae422b 308
c17ec25e 309 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
ac0930c8 310 if (r >= 0)
c17ec25e 311 log_debug("Successfully mounted %s to %s", what, m->path);
ea92ae33
MW
312 else if (m->ignore && errno == ENOENT)
313 r = 0;
15ae422b 314
ac0930c8
LP
315 return r;
316}
15ae422b 317
c17ec25e 318static int make_read_only(BindMount *m) {
ac0930c8 319 int r;
15ae422b 320
c17ec25e 321 assert(m);
ac0930c8 322
c17ec25e 323 if (m->mode != INACCESSIBLE && m->mode != READONLY)
ac0930c8
LP
324 return 0;
325
c17ec25e 326 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
ea92ae33 327 if (r < 0 && !(m->ignore && errno == ENOENT))
ac0930c8
LP
328 return -errno;
329
330 return 0;
15ae422b
LP
331}
332
613b411c
LP
333int setup_namespace(
334 char** read_write_dirs,
335 char** read_only_dirs,
336 char** inaccessible_dirs,
337 char* tmp_dir,
338 char* var_tmp_dir,
7f112f50 339 bool private_dev,
417116f2
LP
340 ProtectedHome protected_home,
341 bool read_only_system,
613b411c 342 unsigned mount_flags) {
15ae422b 343
7ff7394d 344 BindMount *m, *mounts = NULL;
613b411c 345 unsigned n;
c17ec25e 346 int r = 0;
15ae422b 347
613b411c 348 if (mount_flags == 0)
c17ec25e 349 mount_flags = MS_SHARED;
ac0930c8 350
d5a3f0ea
ZJS
351 if (unshare(CLONE_NEWNS) < 0)
352 return -errno;
15ae422b 353
613b411c
LP
354 n = !!tmp_dir + !!var_tmp_dir +
355 strv_length(read_write_dirs) +
356 strv_length(read_only_dirs) +
7f112f50 357 strv_length(inaccessible_dirs) +
417116f2
LP
358 private_dev +
359 (protected_home != PROTECTED_HOME_NO ? 2 : 0) +
360 (read_only_system ? 2 : 0);
613b411c
LP
361
362 if (n > 0) {
7ff7394d 363 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
613b411c
LP
364 r = append_mounts(&m, read_write_dirs, READWRITE);
365 if (r < 0)
366 return r;
367
368 r = append_mounts(&m, read_only_dirs, READONLY);
369 if (r < 0)
370 return r;
371
372 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
373 if (r < 0)
7ff7394d
ZJS
374 return r;
375
613b411c 376 if (tmp_dir) {
7ff7394d
ZJS
377 m->path = "/tmp";
378 m->mode = PRIVATE_TMP;
379 m++;
613b411c 380 }
7ff7394d 381
613b411c 382 if (var_tmp_dir) {
7ff7394d
ZJS
383 m->path = "/var/tmp";
384 m->mode = PRIVATE_VAR_TMP;
385 m++;
386 }
ac0930c8 387
7f112f50
LP
388 if (private_dev) {
389 m->path = "/dev";
390 m->mode = PRIVATE_DEV;
391 m++;
392 }
393
417116f2
LP
394 if (protected_home != PROTECTED_HOME_NO) {
395 r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
396 if (r < 0)
397 return r;
398 }
399
400 if (read_only_system) {
401 r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
402 if (r < 0)
403 return r;
404 }
405
7ff7394d 406 assert(mounts + n == m);
ac0930c8 407
7ff7394d
ZJS
408 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
409 drop_duplicates(mounts, &n);
15ae422b
LP
410 }
411
c2c13f2d
LP
412 if (n > 0) {
413 /* Remount / as SLAVE so that nothing now mounted in the namespace
414 shows up in the parent */
415 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
416 return -errno;
417
418 for (m = mounts; m < mounts + n; ++m) {
419 r = apply_mount(m, tmp_dir, var_tmp_dir);
420 if (r < 0)
421 goto fail;
422 }
15ae422b 423
c2c13f2d
LP
424 for (m = mounts; m < mounts + n; ++m) {
425 r = make_read_only(m);
426 if (r < 0)
427 goto fail;
428 }
15ae422b
LP
429 }
430
c2c13f2d
LP
431 /* Remount / as the desired mode. Not that this will not
432 * reestablish propagation from our side to the host, since
433 * what's disconnected is disconnected. */
c17ec25e 434 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
15ae422b 435 r = -errno;
613b411c 436 goto fail;
15ae422b
LP
437 }
438
15ae422b
LP
439 return 0;
440
613b411c 441fail:
c2c13f2d
LP
442 if (n > 0) {
443 for (m = mounts; m < mounts + n; ++m)
444 if (m->done)
445 umount2(m->path, MNT_DETACH);
446 }
613b411c
LP
447
448 return r;
449}
450
451static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
452 _cleanup_free_ char *x = NULL;
6b46ea73
LP
453 char bid[SD_ID128_STRING_MAX];
454 sd_id128_t boot_id;
455 int r;
613b411c
LP
456
457 assert(id);
458 assert(prefix);
459 assert(path);
460
6b46ea73
LP
461 /* We include the boot id in the directory so that after a
462 * reboot we can easily identify obsolete directories. */
463
464 r = sd_id128_get_boot(&boot_id);
465 if (r < 0)
466 return r;
467
468 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
613b411c
LP
469 if (!x)
470 return -ENOMEM;
471
472 RUN_WITH_UMASK(0077)
473 if (!mkdtemp(x))
474 return -errno;
475
476 RUN_WITH_UMASK(0000) {
477 char *y;
478
479 y = strappenda(x, "/tmp");
480
481 if (mkdir(y, 0777 | S_ISVTX) < 0)
482 return -errno;
c17ec25e 483 }
15ae422b 484
613b411c
LP
485 *path = x;
486 x = NULL;
487
488 return 0;
489}
490
491int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
492 char *a, *b;
493 int r;
494
495 assert(id);
496 assert(tmp_dir);
497 assert(var_tmp_dir);
498
499 r = setup_one_tmp_dir(id, "/tmp", &a);
500 if (r < 0)
501 return r;
502
503 r = setup_one_tmp_dir(id, "/var/tmp", &b);
504 if (r < 0) {
505 char *t;
506
507 t = strappenda(a, "/tmp");
508 rmdir(t);
509 rmdir(a);
510
511 free(a);
512 return r;
513 }
514
515 *tmp_dir = a;
516 *var_tmp_dir = b;
517
518 return 0;
519}
520
521int setup_netns(int netns_storage_socket[2]) {
522 _cleanup_close_ int netns = -1;
523 union {
524 struct cmsghdr cmsghdr;
525 uint8_t buf[CMSG_SPACE(sizeof(int))];
526 } control = {};
527 struct msghdr mh = {
528 .msg_control = &control,
529 .msg_controllen = sizeof(control),
530 };
531 struct cmsghdr *cmsg;
532 int r;
533
534 assert(netns_storage_socket);
535 assert(netns_storage_socket[0] >= 0);
536 assert(netns_storage_socket[1] >= 0);
537
538 /* We use the passed socketpair as a storage buffer for our
76cd584b
LP
539 * namespace reference fd. Whatever process runs this first
540 * shall create a new namespace, all others should just join
541 * it. To serialize that we use a file lock on the socket
542 * pair.
613b411c
LP
543 *
544 * It's a bit crazy, but hey, works great! */
545
546 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
547 return -errno;
548
549 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
550 if (errno != EAGAIN) {
551 r = -errno;
552 goto fail;
553 }
554
555 /* Nothing stored yet, so let's create a new namespace */
556
557 if (unshare(CLONE_NEWNET) < 0) {
558 r = -errno;
559 goto fail;
560 }
561
562 loopback_setup();
563
564 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
565 if (netns < 0) {
566 r = -errno;
567 goto fail;
568 }
569
570 r = 1;
571 } else {
572 /* Yay, found something, so let's join the namespace */
573
574 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
575 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
576 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
577 netns = *(int*) CMSG_DATA(cmsg);
578 }
579 }
580
581 if (setns(netns, CLONE_NEWNET) < 0) {
582 r = -errno;
583 goto fail;
584 }
585
586 r = 0;
587 }
588
589 cmsg = CMSG_FIRSTHDR(&mh);
590 cmsg->cmsg_level = SOL_SOCKET;
591 cmsg->cmsg_type = SCM_RIGHTS;
592 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
593 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
594 mh.msg_controllen = cmsg->cmsg_len;
595
596 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
597 r = -errno;
598 goto fail;
599 }
600
601fail:
602 lockf(netns_storage_socket[0], F_ULOCK, 0);
603
15ae422b
LP
604 return r;
605}
417116f2
LP
606
607static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
608 [PROTECTED_HOME_NO] = "no",
609 [PROTECTED_HOME_YES] = "yes",
610 [PROTECTED_HOME_READ_ONLY] = "read-only",
611};
612
613DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);