]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/namespace.c
namespace: add some debug logging when enforcing InaccessiblePaths=
[thirdparty/systemd.git] / src / core / namespace.c
CommitLineData
15ae422b
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
15ae422b
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
15ae422b 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
15ae422b
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
20#include <errno.h>
07630cea 21#include <sched.h>
15ae422b 22#include <stdio.h>
07630cea
LP
23#include <string.h>
24#include <sys/mount.h>
15ae422b 25#include <sys/stat.h>
07630cea 26#include <unistd.h>
25e870b5 27#include <linux/fs.h>
15ae422b 28
b5efdb8a 29#include "alloc-util.h"
7f112f50 30#include "dev-setup.h"
3ffd4af2 31#include "fd-util.h"
07630cea
LP
32#include "loopback-setup.h"
33#include "missing.h"
34#include "mkdir.h"
4349cd7c 35#include "mount-util.h"
3ffd4af2 36#include "namespace.h"
07630cea 37#include "path-util.h"
d7b8eec7 38#include "selinux-util.h"
2583fbea 39#include "socket-util.h"
8b43440b 40#include "string-table.h"
07630cea
LP
41#include "string-util.h"
42#include "strv.h"
affb60b1 43#include "umask-util.h"
ee104e11 44#include "user-util.h"
07630cea 45#include "util.h"
15ae422b 46
737ba3c8 47#define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC)
48
c17ec25e 49typedef enum MountMode {
15ae422b
LP
50 /* This is ordered by priority! */
51 INACCESSIBLE,
52 READONLY,
ac0930c8
LP
53 PRIVATE_TMP,
54 PRIVATE_VAR_TMP,
7f112f50 55 PRIVATE_DEV,
59eeb84b 56 READWRITE,
c17ec25e 57} MountMode;
15ae422b 58
c17ec25e 59typedef struct BindMount {
15ae422b 60 const char *path;
c17ec25e 61 MountMode mode;
ac0930c8 62 bool done;
ea92ae33 63 bool ignore;
c17ec25e 64} BindMount;
15ae422b 65
c17ec25e 66static int append_mounts(BindMount **p, char **strv, MountMode mode) {
15ae422b
LP
67 char **i;
68
613b411c
LP
69 assert(p);
70
15ae422b
LP
71 STRV_FOREACH(i, strv) {
72
ea92ae33 73 (*p)->ignore = false;
002b2268 74 (*p)->done = false;
ea92ae33 75
94828d2d 76 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
ea92ae33
MW
77 (*p)->ignore = true;
78 (*i)++;
79 }
80
15ae422b
LP
81 if (!path_is_absolute(*i))
82 return -EINVAL;
83
84 (*p)->path = *i;
85 (*p)->mode = mode;
86 (*p)++;
87 }
88
89 return 0;
90}
91
c17ec25e
MS
92static int mount_path_compare(const void *a, const void *b) {
93 const BindMount *p = a, *q = b;
a0827e2b 94 int d;
15ae422b 95
6ee1a919 96 /* If the paths are not equal, then order prefixes first */
a0827e2b 97 d = path_compare(p->path, q->path);
6ee1a919
LP
98 if (d != 0)
99 return d;
15ae422b 100
6ee1a919
LP
101 /* If the paths are equal, check the mode */
102 if (p->mode < q->mode)
103 return -1;
15ae422b 104
6ee1a919
LP
105 if (p->mode > q->mode)
106 return 1;
15ae422b 107
6ee1a919 108 return 0;
15ae422b
LP
109}
110
c17ec25e
MS
111static void drop_duplicates(BindMount *m, unsigned *n) {
112 BindMount *f, *t, *previous;
15ae422b 113
c17ec25e 114 assert(m);
15ae422b 115 assert(n);
15ae422b 116
fe3c2583
LP
117 /* Drops duplicate entries. Expects that the array is properly ordered already. */
118
c17ec25e 119 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
15ae422b 120
fe3c2583
LP
121 /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
122 * above. */
123 if (previous && path_equal(f->path, previous->path)) {
124 log_debug("%s is duplicate.", f->path);
15ae422b 125 continue;
fe3c2583 126 }
15ae422b 127
e2d7c1a0 128 *t = *f;
15ae422b 129 previous = t;
fe3c2583
LP
130 t++;
131 }
132
133 *n = t - m;
134}
135
136static void drop_inaccessible(BindMount *m, unsigned *n) {
137 BindMount *f, *t;
138 const char *clear = NULL;
139
140 assert(m);
141 assert(n);
142
143 /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
144 * ordered already. */
145
146 for (f = m, t = m; f < m+*n; f++) {
147
148 /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
149 * it, as inaccessible paths really should drop the entire subtree. */
150 if (clear && path_startswith(f->path, clear)) {
151 log_debug("%s is masked by %s.", f->path, clear);
152 continue;
153 }
15ae422b 154
fe3c2583
LP
155 clear = f->mode == INACCESSIBLE ? f->path : NULL;
156
157 *t = *f;
15ae422b
LP
158 t++;
159 }
160
c17ec25e 161 *n = t - m;
15ae422b
LP
162}
163
7648a565
LP
164static void drop_nop(BindMount *m, unsigned *n) {
165 BindMount *f, *t;
166
167 assert(m);
168 assert(n);
169
170 /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
171 * list is ordered by prefixes. */
172
173 for (f = m, t = m; f < m+*n; f++) {
174
175 /* Only suppress such subtrees for READONLY and READWRITE entries */
176 if (IN_SET(f->mode, READONLY, READWRITE)) {
177 BindMount *p;
178 bool found = false;
179
180 /* Now let's find the first parent of the entry we are looking at. */
181 for (p = t-1; p >= m; p--) {
182 if (path_startswith(f->path, p->path)) {
183 found = true;
184 break;
185 }
186 }
187
188 /* We found it, let's see if it's the same mode, if so, we can drop this entry */
189 if (found && p->mode == f->mode) {
190 log_debug("%s is redundant by %s", f->path, p->path);
191 continue;
192 }
193 }
194
195 *t = *f;
196 t++;
197 }
198
199 *n = t - m;
200}
201
7f112f50
LP
202static int mount_dev(BindMount *m) {
203 static const char devnodes[] =
204 "/dev/null\0"
205 "/dev/zero\0"
206 "/dev/full\0"
207 "/dev/random\0"
208 "/dev/urandom\0"
209 "/dev/tty\0";
210
2b85f4e1 211 char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
63cc4c31 212 const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
7f112f50
LP
213 _cleanup_umask_ mode_t u;
214 int r;
215
216 assert(m);
217
218 u = umask(0000);
219
2b85f4e1
LP
220 if (!mkdtemp(temporary_mount))
221 return -errno;
222
63c372cb 223 dev = strjoina(temporary_mount, "/dev");
dc751688 224 (void) mkdir(dev, 0755);
737ba3c8 225 if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) {
2b85f4e1
LP
226 r = -errno;
227 goto fail;
228 }
229
63c372cb 230 devpts = strjoina(temporary_mount, "/dev/pts");
dc751688 231 (void) mkdir(devpts, 0755);
2b85f4e1
LP
232 if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
233 r = -errno;
234 goto fail;
235 }
236
63c372cb 237 devptmx = strjoina(temporary_mount, "/dev/ptmx");
3164e3cb
ZJS
238 if (symlink("pts/ptmx", devptmx) < 0) {
239 r = -errno;
240 goto fail;
241 }
e06b6479 242
63c372cb 243 devshm = strjoina(temporary_mount, "/dev/shm");
dc751688 244 (void) mkdir(devshm, 01777);
2b85f4e1
LP
245 r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
246 if (r < 0) {
247 r = -errno;
248 goto fail;
249 }
250
63c372cb 251 devmqueue = strjoina(temporary_mount, "/dev/mqueue");
dc751688 252 (void) mkdir(devmqueue, 0755);
3164e3cb 253 (void) mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
2b85f4e1 254
63c372cb 255 devhugepages = strjoina(temporary_mount, "/dev/hugepages");
dc751688 256 (void) mkdir(devhugepages, 0755);
3164e3cb 257 (void) mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
2b85f4e1 258
63c372cb 259 devlog = strjoina(temporary_mount, "/dev/log");
3164e3cb 260 (void) symlink("/run/systemd/journal/dev-log", devlog);
82d25240 261
7f112f50 262 NULSTR_FOREACH(d, devnodes) {
2b85f4e1
LP
263 _cleanup_free_ char *dn = NULL;
264 struct stat st;
265
266 r = stat(d, &st);
7f112f50 267 if (r < 0) {
2b85f4e1
LP
268
269 if (errno == ENOENT)
270 continue;
271
272 r = -errno;
273 goto fail;
7f112f50
LP
274 }
275
2b85f4e1
LP
276 if (!S_ISBLK(st.st_mode) &&
277 !S_ISCHR(st.st_mode)) {
278 r = -EINVAL;
279 goto fail;
280 }
281
282 if (st.st_rdev == 0)
283 continue;
284
285 dn = strappend(temporary_mount, d);
286 if (!dn) {
287 r = -ENOMEM;
288 goto fail;
289 }
290
ecabcf8b 291 mac_selinux_create_file_prepare(d, st.st_mode);
2b85f4e1 292 r = mknod(dn, st.st_mode, st.st_rdev);
ecabcf8b 293 mac_selinux_create_file_clear();
dd078a1e 294
2b85f4e1
LP
295 if (r < 0) {
296 r = -errno;
297 goto fail;
298 }
7f112f50
LP
299 }
300
03cfe0d5 301 dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
7f112f50 302
ee818b89
AC
303 /* Create the /dev directory if missing. It is more likely to be
304 * missing when the service is started with RootDirectory. This is
305 * consistent with mount units creating the mount points when missing.
306 */
307 (void) mkdir_p_label(m->path, 0755);
308
9e5f8252 309 /* Unmount everything in old /dev */
310 umount_recursive(m->path, 0);
ee818b89 311 if (mount(dev, m->path, NULL, MS_MOVE, NULL) < 0) {
2b85f4e1
LP
312 r = -errno;
313 goto fail;
314 }
7f112f50 315
2b85f4e1
LP
316 rmdir(dev);
317 rmdir(temporary_mount);
7f112f50 318
2b85f4e1 319 return 0;
7f112f50 320
2b85f4e1
LP
321fail:
322 if (devpts)
323 umount(devpts);
7f112f50 324
2b85f4e1
LP
325 if (devshm)
326 umount(devshm);
7f112f50 327
2b85f4e1
LP
328 if (devhugepages)
329 umount(devhugepages);
7f112f50 330
2b85f4e1
LP
331 if (devmqueue)
332 umount(devmqueue);
7f112f50 333
d267c5aa
ZJS
334 umount(dev);
335 rmdir(dev);
2b85f4e1 336 rmdir(temporary_mount);
7f112f50 337
2b85f4e1 338 return r;
7f112f50
LP
339}
340
ac0930c8 341static int apply_mount(
c17ec25e 342 BindMount *m,
ac0930c8 343 const char *tmp_dir,
c17ec25e 344 const char *var_tmp_dir) {
ac0930c8 345
15ae422b 346 const char *what;
15ae422b 347 int r;
15ae422b 348
c17ec25e 349 assert(m);
15ae422b 350
fe3c2583
LP
351 log_debug("Applying namespace mount on %s", m->path);
352
c17ec25e 353 switch (m->mode) {
15ae422b 354
160cfdbe
LP
355 case INACCESSIBLE: {
356 struct stat target;
6d313367
LP
357
358 /* First, get rid of everything that is below if there
359 * is anything... Then, overmount it with an
c4b41707 360 * inaccessible path. */
fe3c2583 361 (void) umount_recursive(m->path, 0);
6d313367 362
5fd7cf6f 363 if (lstat(m->path, &target) < 0) {
c4b41707
AP
364 if (m->ignore && errno == ENOENT)
365 return 0;
160cfdbe 366 return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
c4b41707 367 }
15ae422b 368
c4b41707 369 what = mode_to_inaccessible_node(target.st_mode);
5fd7cf6f
LP
370 if (!what) {
371 log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed");
c4b41707
AP
372 return -ELOOP;
373 }
374 break;
160cfdbe 375 }
fe3c2583 376
15ae422b 377 case READONLY:
15ae422b 378 case READWRITE:
6b7c9f8b
LP
379
380 r = path_is_mount_point(m->path, 0);
381 if (r < 0) {
382 if (m->ignore && errno == ENOENT)
383 return 0;
384 return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
385 }
386 if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
387 return 0;
388
389 /* This isn't a mount point yet, let's make it one. */
390 what = m->path;
391 break;
15ae422b 392
ac0930c8
LP
393 case PRIVATE_TMP:
394 what = tmp_dir;
395 break;
396
397 case PRIVATE_VAR_TMP:
398 what = var_tmp_dir;
15ae422b 399 break;
e364ad06 400
d6797c92
LP
401 case PRIVATE_DEV:
402 return mount_dev(m);
403
e364ad06
LP
404 default:
405 assert_not_reached("Unknown mode");
15ae422b
LP
406 }
407
ac0930c8 408 assert(what);
15ae422b 409
6b7c9f8b 410 if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) {
c4b41707
AP
411 if (m->ignore && errno == ENOENT)
412 return 0;
6b7c9f8b 413
5fd7cf6f 414 return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
c4b41707 415 }
6b7c9f8b
LP
416
417 log_debug("Successfully mounted %s to %s", what, m->path);
418 return 0;
ac0930c8 419}
15ae422b 420
6b7c9f8b
LP
421static int make_read_only(BindMount *m, char **blacklist) {
422 int r = 0;
15ae422b 423
c17ec25e 424 assert(m);
ac0930c8 425
d6797c92 426 if (IN_SET(m->mode, INACCESSIBLE, READONLY))
6b7c9f8b
LP
427 r = bind_remount_recursive(m->path, true, blacklist);
428 else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
429 if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
430 r = -errno;
737ba3c8 431 } else
6b7c9f8b
LP
432 return 0;
433
434 /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
435 * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
436 * read-only mounts already applied. */
ac0930c8 437
d6797c92
LP
438 if (m->ignore && r == -ENOENT)
439 return 0;
ac0930c8 440
d6797c92 441 return r;
15ae422b
LP
442}
443
613b411c 444int setup_namespace(
ee818b89 445 const char* root_directory,
2a624c36
AP
446 char** read_write_paths,
447 char** read_only_paths,
448 char** inaccessible_paths,
a004cb4c
LP
449 const char* tmp_dir,
450 const char* var_tmp_dir,
7f112f50 451 bool private_dev,
59eeb84b
LP
452 bool protect_sysctl,
453 bool protect_cgroups,
1b8689f9
LP
454 ProtectHome protect_home,
455 ProtectSystem protect_system,
e6547662 456 unsigned long mount_flags) {
15ae422b 457
7ff7394d 458 BindMount *m, *mounts = NULL;
613b411c 459 unsigned n;
c17ec25e 460 int r = 0;
15ae422b 461
613b411c 462 if (mount_flags == 0)
c17ec25e 463 mount_flags = MS_SHARED;
ac0930c8 464
d5a3f0ea
ZJS
465 if (unshare(CLONE_NEWNS) < 0)
466 return -errno;
15ae422b 467
9ca6ff50 468 n = !!tmp_dir + !!var_tmp_dir +
2a624c36
AP
469 strv_length(read_write_paths) +
470 strv_length(read_only_paths) +
471 strv_length(inaccessible_paths) +
417116f2 472 private_dev +
59eeb84b
LP
473 (protect_sysctl ? 3 : 0) +
474 (protect_cgroups != protect_sysctl) +
c8835999 475 (protect_home != PROTECT_HOME_NO ? 3 : 0) +
051be1f7 476 (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
1b8689f9 477 (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
613b411c
LP
478
479 if (n > 0) {
002b2268 480 m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
2a624c36 481 r = append_mounts(&m, read_write_paths, READWRITE);
613b411c
LP
482 if (r < 0)
483 return r;
484
2a624c36 485 r = append_mounts(&m, read_only_paths, READONLY);
613b411c
LP
486 if (r < 0)
487 return r;
488
2a624c36 489 r = append_mounts(&m, inaccessible_paths, INACCESSIBLE);
613b411c 490 if (r < 0)
7ff7394d
ZJS
491 return r;
492
613b411c 493 if (tmp_dir) {
ee818b89 494 m->path = prefix_roota(root_directory, "/tmp");
7ff7394d
ZJS
495 m->mode = PRIVATE_TMP;
496 m++;
613b411c 497 }
7ff7394d 498
613b411c 499 if (var_tmp_dir) {
ee818b89 500 m->path = prefix_roota(root_directory, "/var/tmp");
7ff7394d
ZJS
501 m->mode = PRIVATE_VAR_TMP;
502 m++;
503 }
ac0930c8 504
7f112f50 505 if (private_dev) {
ee818b89 506 m->path = prefix_roota(root_directory, "/dev");
7f112f50
LP
507 m->mode = PRIVATE_DEV;
508 m++;
509 }
510
59eeb84b
LP
511 if (protect_sysctl) {
512 m->path = prefix_roota(root_directory, "/proc/sys");
513 m->mode = READONLY;
514 m++;
515
516 m->path = prefix_roota(root_directory, "/proc/sysrq-trigger");
517 m->mode = READONLY;
518 m->ignore = true; /* Not always compiled into the kernel */
519 m++;
520
521 m->path = prefix_roota(root_directory, "/sys");
522 m->mode = READONLY;
523 m++;
524 }
525
526 if (protect_cgroups != protect_sysctl) {
527 m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
528 m->mode = protect_cgroups ? READONLY : READWRITE;
529 m++;
530 }
531
1b8689f9 532 if (protect_home != PROTECT_HOME_NO) {
ee818b89
AC
533 const char *home_dir, *run_user_dir, *root_dir;
534
535 home_dir = prefix_roota(root_directory, "/home");
536 home_dir = strjoina("-", home_dir);
537 run_user_dir = prefix_roota(root_directory, "/run/user");
538 run_user_dir = strjoina("-", run_user_dir);
539 root_dir = prefix_roota(root_directory, "/root");
540 root_dir = strjoina("-", root_dir);
541
542 r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
543 protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
417116f2
LP
544 if (r < 0)
545 return r;
546 }
547
1b8689f9 548 if (protect_system != PROTECT_SYSTEM_NO) {
ee818b89
AC
549 const char *usr_dir, *boot_dir, *etc_dir;
550
d38e01dc 551 usr_dir = prefix_roota(root_directory, "/usr");
ee818b89
AC
552 boot_dir = prefix_roota(root_directory, "/boot");
553 boot_dir = strjoina("-", boot_dir);
554 etc_dir = prefix_roota(root_directory, "/etc");
555
556 r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
557 ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
558 : STRV_MAKE(usr_dir, boot_dir), READONLY);
417116f2
LP
559 if (r < 0)
560 return r;
561 }
562
7ff7394d 563 assert(mounts + n == m);
ac0930c8 564
7ff7394d 565 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
fe3c2583 566
7ff7394d 567 drop_duplicates(mounts, &n);
fe3c2583 568 drop_inaccessible(mounts, &n);
7648a565 569 drop_nop(mounts, &n);
15ae422b
LP
570 }
571
ee818b89 572 if (n > 0 || root_directory) {
c2c13f2d
LP
573 /* Remount / as SLAVE so that nothing now mounted in the namespace
574 shows up in the parent */
575 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
576 return -errno;
ee818b89
AC
577 }
578
579 if (root_directory) {
580 /* Turn directory into bind mount */
581 if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
582 return -errno;
583 }
c2c13f2d 584
ee818b89 585 if (n > 0) {
6b7c9f8b
LP
586 char **blacklist;
587 unsigned j;
588
589 /* First round, add in all special mounts we need */
c2c13f2d
LP
590 for (m = mounts; m < mounts + n; ++m) {
591 r = apply_mount(m, tmp_dir, var_tmp_dir);
592 if (r < 0)
593 goto fail;
594 }
15ae422b 595
6b7c9f8b
LP
596 /* Create a blacklist we can pass to bind_mount_recursive() */
597 blacklist = newa(char*, n+1);
598 for (j = 0; j < n; j++)
599 blacklist[j] = (char*) mounts[j].path;
600 blacklist[j] = NULL;
601
602 /* Second round, flip the ro bits if necessary. */
c2c13f2d 603 for (m = mounts; m < mounts + n; ++m) {
6b7c9f8b 604 r = make_read_only(m, blacklist);
c2c13f2d
LP
605 if (r < 0)
606 goto fail;
607 }
15ae422b
LP
608 }
609
ee818b89
AC
610 if (root_directory) {
611 /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
612 r = mount_move_root(root_directory);
6b7c9f8b 613 if (r < 0) /* at this point, we cannot rollback */
ee818b89
AC
614 return r;
615 }
616
c2c13f2d
LP
617 /* Remount / as the desired mode. Not that this will not
618 * reestablish propagation from our side to the host, since
619 * what's disconnected is disconnected. */
1f6b4113 620 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
6b7c9f8b 621 return -errno; /* at this point, we cannot rollback */
15ae422b 622
15ae422b
LP
623 return 0;
624
613b411c 625fail:
c2c13f2d 626 if (n > 0) {
59eeb84b
LP
627 for (m = mounts; m < mounts + n; ++m) {
628 if (!m->done)
629 continue;
630
631 (void) umount2(m->path, MNT_DETACH);
632 }
c2c13f2d 633 }
613b411c
LP
634
635 return r;
636}
637
638static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
639 _cleanup_free_ char *x = NULL;
6b46ea73
LP
640 char bid[SD_ID128_STRING_MAX];
641 sd_id128_t boot_id;
642 int r;
613b411c
LP
643
644 assert(id);
645 assert(prefix);
646 assert(path);
647
6b46ea73
LP
648 /* We include the boot id in the directory so that after a
649 * reboot we can easily identify obsolete directories. */
650
651 r = sd_id128_get_boot(&boot_id);
652 if (r < 0)
653 return r;
654
655 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
613b411c
LP
656 if (!x)
657 return -ENOMEM;
658
659 RUN_WITH_UMASK(0077)
660 if (!mkdtemp(x))
661 return -errno;
662
663 RUN_WITH_UMASK(0000) {
664 char *y;
665
63c372cb 666 y = strjoina(x, "/tmp");
613b411c
LP
667
668 if (mkdir(y, 0777 | S_ISVTX) < 0)
669 return -errno;
c17ec25e 670 }
15ae422b 671
613b411c
LP
672 *path = x;
673 x = NULL;
674
675 return 0;
676}
677
678int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
679 char *a, *b;
680 int r;
681
682 assert(id);
683 assert(tmp_dir);
684 assert(var_tmp_dir);
685
686 r = setup_one_tmp_dir(id, "/tmp", &a);
687 if (r < 0)
688 return r;
689
690 r = setup_one_tmp_dir(id, "/var/tmp", &b);
691 if (r < 0) {
692 char *t;
693
63c372cb 694 t = strjoina(a, "/tmp");
613b411c
LP
695 rmdir(t);
696 rmdir(a);
697
698 free(a);
699 return r;
700 }
701
702 *tmp_dir = a;
703 *var_tmp_dir = b;
704
705 return 0;
706}
707
708int setup_netns(int netns_storage_socket[2]) {
709 _cleanup_close_ int netns = -1;
3ee897d6 710 int r, q;
613b411c
LP
711
712 assert(netns_storage_socket);
713 assert(netns_storage_socket[0] >= 0);
714 assert(netns_storage_socket[1] >= 0);
715
716 /* We use the passed socketpair as a storage buffer for our
76cd584b
LP
717 * namespace reference fd. Whatever process runs this first
718 * shall create a new namespace, all others should just join
719 * it. To serialize that we use a file lock on the socket
720 * pair.
613b411c
LP
721 *
722 * It's a bit crazy, but hey, works great! */
723
724 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
725 return -errno;
726
3ee897d6
LP
727 netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
728 if (netns == -EAGAIN) {
613b411c
LP
729 /* Nothing stored yet, so let's create a new namespace */
730
731 if (unshare(CLONE_NEWNET) < 0) {
732 r = -errno;
733 goto fail;
734 }
735
736 loopback_setup();
737
738 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
739 if (netns < 0) {
740 r = -errno;
741 goto fail;
742 }
743
744 r = 1;
613b411c 745
3ee897d6
LP
746 } else if (netns < 0) {
747 r = netns;
748 goto fail;
613b411c 749
3ee897d6
LP
750 } else {
751 /* Yay, found something, so let's join the namespace */
613b411c
LP
752 if (setns(netns, CLONE_NEWNET) < 0) {
753 r = -errno;
754 goto fail;
755 }
756
757 r = 0;
758 }
759
3ee897d6
LP
760 q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
761 if (q < 0) {
762 r = q;
613b411c
LP
763 goto fail;
764 }
765
766fail:
fe048ce5 767 (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
15ae422b
LP
768 return r;
769}
417116f2 770
1b8689f9
LP
771static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
772 [PROTECT_HOME_NO] = "no",
773 [PROTECT_HOME_YES] = "yes",
774 [PROTECT_HOME_READ_ONLY] = "read-only",
417116f2
LP
775};
776
1b8689f9
LP
777DEFINE_STRING_TABLE_LOOKUP(protect_home, ProtectHome);
778
779static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
780 [PROTECT_SYSTEM_NO] = "no",
781 [PROTECT_SYSTEM_YES] = "yes",
782 [PROTECT_SYSTEM_FULL] = "full",
783};
784
785DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);