]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/namespace.c
bus: properly handle if we get disconnected during HELLO phase
[thirdparty/systemd.git] / src / core / namespace.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
15ae422b
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
15ae422b
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
15ae422b 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
15ae422b
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <sys/mount.h>
24#include <string.h>
25#include <stdio.h>
26#include <unistd.h>
27#include <sys/stat.h>
28#include <sys/types.h>
29#include <sched.h>
30#include <sys/syscall.h>
31#include <limits.h>
25e870b5 32#include <linux/fs.h>
613b411c 33#include <sys/file.h>
15ae422b
LP
34
35#include "strv.h"
36#include "util.h"
9eb977db 37#include "path-util.h"
15ae422b
LP
38#include "namespace.h"
39#include "missing.h"
c17ec25e 40#include "execute.h"
613b411c 41#include "loopback-setup.h"
15ae422b 42
c17ec25e 43typedef enum MountMode {
15ae422b
LP
44 /* This is ordered by priority! */
45 INACCESSIBLE,
46 READONLY,
ac0930c8
LP
47 PRIVATE_TMP,
48 PRIVATE_VAR_TMP,
15ae422b 49 READWRITE
c17ec25e 50} MountMode;
15ae422b 51
c17ec25e 52typedef struct BindMount {
15ae422b 53 const char *path;
c17ec25e 54 MountMode mode;
ac0930c8 55 bool done;
ea92ae33 56 bool ignore;
c17ec25e 57} BindMount;
15ae422b 58
c17ec25e 59static int append_mounts(BindMount **p, char **strv, MountMode mode) {
15ae422b
LP
60 char **i;
61
613b411c
LP
62 assert(p);
63
15ae422b
LP
64 STRV_FOREACH(i, strv) {
65
ea92ae33
MW
66 (*p)->ignore = false;
67
68 if ((mode == INACCESSIBLE || mode == READONLY) && (*i)[0] == '-') {
69 (*p)->ignore = true;
70 (*i)++;
71 }
72
15ae422b
LP
73 if (!path_is_absolute(*i))
74 return -EINVAL;
75
76 (*p)->path = *i;
77 (*p)->mode = mode;
78 (*p)++;
79 }
80
81 return 0;
82}
83
c17ec25e
MS
84static int mount_path_compare(const void *a, const void *b) {
85 const BindMount *p = a, *q = b;
15ae422b
LP
86
87 if (path_equal(p->path, q->path)) {
88
89 /* If the paths are equal, check the mode */
90 if (p->mode < q->mode)
91 return -1;
92
93 if (p->mode > q->mode)
94 return 1;
95
96 return 0;
97 }
98
99 /* If the paths are not equal, then order prefixes first */
100 if (path_startswith(p->path, q->path))
101 return 1;
102
103 if (path_startswith(q->path, p->path))
104 return -1;
105
106 return 0;
107}
108
c17ec25e
MS
109static void drop_duplicates(BindMount *m, unsigned *n) {
110 BindMount *f, *t, *previous;
15ae422b 111
c17ec25e 112 assert(m);
15ae422b 113 assert(n);
15ae422b 114
c17ec25e 115 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
15ae422b 116
ac0930c8 117 /* The first one wins */
15ae422b
LP
118 if (previous && path_equal(f->path, previous->path))
119 continue;
120
121 t->path = f->path;
122 t->mode = f->mode;
123
15ae422b
LP
124 previous = t;
125
126 t++;
127 }
128
c17ec25e 129 *n = t - m;
15ae422b
LP
130}
131
ac0930c8 132static int apply_mount(
c17ec25e 133 BindMount *m,
ac0930c8 134 const char *tmp_dir,
c17ec25e 135 const char *var_tmp_dir) {
ac0930c8 136
15ae422b 137 const char *what;
15ae422b 138 int r;
15ae422b 139
c17ec25e 140 assert(m);
15ae422b 141
c17ec25e 142 switch (m->mode) {
15ae422b
LP
143
144 case INACCESSIBLE:
c17ec25e 145 what = "/run/systemd/inaccessible";
15ae422b
LP
146 break;
147
148 case READONLY:
15ae422b 149 case READWRITE:
c17ec25e 150 what = m->path;
15ae422b
LP
151 break;
152
ac0930c8
LP
153 case PRIVATE_TMP:
154 what = tmp_dir;
155 break;
156
157 case PRIVATE_VAR_TMP:
158 what = var_tmp_dir;
15ae422b 159 break;
e364ad06
LP
160
161 default:
162 assert_not_reached("Unknown mode");
15ae422b
LP
163 }
164
ac0930c8 165 assert(what);
15ae422b 166
c17ec25e 167 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
ac0930c8 168 if (r >= 0)
c17ec25e 169 log_debug("Successfully mounted %s to %s", what, m->path);
ea92ae33
MW
170 else if (m->ignore && errno == ENOENT)
171 r = 0;
15ae422b 172
ac0930c8
LP
173 return r;
174}
15ae422b 175
c17ec25e 176static int make_read_only(BindMount *m) {
ac0930c8 177 int r;
15ae422b 178
c17ec25e 179 assert(m);
ac0930c8 180
c17ec25e 181 if (m->mode != INACCESSIBLE && m->mode != READONLY)
ac0930c8
LP
182 return 0;
183
c17ec25e 184 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
ea92ae33 185 if (r < 0 && !(m->ignore && errno == ENOENT))
ac0930c8
LP
186 return -errno;
187
188 return 0;
15ae422b
LP
189}
190
613b411c
LP
191int setup_namespace(
192 char** read_write_dirs,
193 char** read_only_dirs,
194 char** inaccessible_dirs,
195 char* tmp_dir,
196 char* var_tmp_dir,
197 unsigned mount_flags) {
15ae422b 198
7ff7394d 199 BindMount *m, *mounts = NULL;
613b411c 200 unsigned n;
c17ec25e 201 int r = 0;
15ae422b 202
613b411c 203 if (mount_flags == 0)
c17ec25e 204 mount_flags = MS_SHARED;
ac0930c8 205
d5a3f0ea
ZJS
206 if (unshare(CLONE_NEWNS) < 0)
207 return -errno;
15ae422b 208
613b411c
LP
209 n = !!tmp_dir + !!var_tmp_dir +
210 strv_length(read_write_dirs) +
211 strv_length(read_only_dirs) +
212 strv_length(inaccessible_dirs);
213
214 if (n > 0) {
7ff7394d 215 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
613b411c
LP
216 r = append_mounts(&m, read_write_dirs, READWRITE);
217 if (r < 0)
218 return r;
219
220 r = append_mounts(&m, read_only_dirs, READONLY);
221 if (r < 0)
222 return r;
223
224 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
225 if (r < 0)
7ff7394d
ZJS
226 return r;
227
613b411c 228 if (tmp_dir) {
7ff7394d
ZJS
229 m->path = "/tmp";
230 m->mode = PRIVATE_TMP;
231 m++;
613b411c 232 }
7ff7394d 233
613b411c 234 if (var_tmp_dir) {
7ff7394d
ZJS
235 m->path = "/var/tmp";
236 m->mode = PRIVATE_VAR_TMP;
237 m++;
238 }
ac0930c8 239
7ff7394d 240 assert(mounts + n == m);
ac0930c8 241
7ff7394d
ZJS
242 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
243 drop_duplicates(mounts, &n);
15ae422b
LP
244 }
245
ac0930c8 246 /* Remount / as SLAVE so that nothing now mounted in the namespace
dc4b0200 247 shows up in the parent */
d5a3f0ea
ZJS
248 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
249 return -errno;
15ae422b 250
c17ec25e
MS
251 for (m = mounts; m < mounts + n; ++m) {
252 r = apply_mount(m, tmp_dir, var_tmp_dir);
c1d70f7c 253 if (r < 0)
613b411c 254 goto fail;
c1d70f7c 255 }
15ae422b 256
c17ec25e
MS
257 for (m = mounts; m < mounts + n; ++m) {
258 r = make_read_only(m);
ac0930c8 259 if (r < 0)
613b411c 260 goto fail;
15ae422b
LP
261 }
262
ac0930c8 263 /* Remount / as the desired mode */
c17ec25e 264 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
15ae422b 265 r = -errno;
613b411c 266 goto fail;
15ae422b
LP
267 }
268
15ae422b
LP
269 return 0;
270
613b411c
LP
271fail:
272 for (m = mounts; m < mounts + n; ++m)
c17ec25e
MS
273 if (m->done)
274 umount2(m->path, MNT_DETACH);
613b411c
LP
275
276 return r;
277}
278
279static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
280 _cleanup_free_ char *x = NULL;
281
282 assert(id);
283 assert(prefix);
284 assert(path);
285
286 x = strjoin(prefix, "/systemd-", id, "-XXXXXX", NULL);
287 if (!x)
288 return -ENOMEM;
289
290 RUN_WITH_UMASK(0077)
291 if (!mkdtemp(x))
292 return -errno;
293
294 RUN_WITH_UMASK(0000) {
295 char *y;
296
297 y = strappenda(x, "/tmp");
298
299 if (mkdir(y, 0777 | S_ISVTX) < 0)
300 return -errno;
c17ec25e 301 }
15ae422b 302
613b411c
LP
303 *path = x;
304 x = NULL;
305
306 return 0;
307}
308
309int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
310 char *a, *b;
311 int r;
312
313 assert(id);
314 assert(tmp_dir);
315 assert(var_tmp_dir);
316
317 r = setup_one_tmp_dir(id, "/tmp", &a);
318 if (r < 0)
319 return r;
320
321 r = setup_one_tmp_dir(id, "/var/tmp", &b);
322 if (r < 0) {
323 char *t;
324
325 t = strappenda(a, "/tmp");
326 rmdir(t);
327 rmdir(a);
328
329 free(a);
330 return r;
331 }
332
333 *tmp_dir = a;
334 *var_tmp_dir = b;
335
336 return 0;
337}
338
339int setup_netns(int netns_storage_socket[2]) {
340 _cleanup_close_ int netns = -1;
341 union {
342 struct cmsghdr cmsghdr;
343 uint8_t buf[CMSG_SPACE(sizeof(int))];
344 } control = {};
345 struct msghdr mh = {
346 .msg_control = &control,
347 .msg_controllen = sizeof(control),
348 };
349 struct cmsghdr *cmsg;
350 int r;
351
352 assert(netns_storage_socket);
353 assert(netns_storage_socket[0] >= 0);
354 assert(netns_storage_socket[1] >= 0);
355
356 /* We use the passed socketpair as a storage buffer for our
357 * namespace socket. Whatever process runs this first shall
358 * create a new namespace, all others should just join it. To
359 * serialize that we use a file lock on the socket pair.
360 *
361 * It's a bit crazy, but hey, works great! */
362
363 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
364 return -errno;
365
366 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
367 if (errno != EAGAIN) {
368 r = -errno;
369 goto fail;
370 }
371
372 /* Nothing stored yet, so let's create a new namespace */
373
374 if (unshare(CLONE_NEWNET) < 0) {
375 r = -errno;
376 goto fail;
377 }
378
379 loopback_setup();
380
381 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
382 if (netns < 0) {
383 r = -errno;
384 goto fail;
385 }
386
387 r = 1;
388 } else {
389 /* Yay, found something, so let's join the namespace */
390
391 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
392 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
393 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
394 netns = *(int*) CMSG_DATA(cmsg);
395 }
396 }
397
398 if (setns(netns, CLONE_NEWNET) < 0) {
399 r = -errno;
400 goto fail;
401 }
402
403 r = 0;
404 }
405
406 cmsg = CMSG_FIRSTHDR(&mh);
407 cmsg->cmsg_level = SOL_SOCKET;
408 cmsg->cmsg_type = SCM_RIGHTS;
409 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
410 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
411 mh.msg_controllen = cmsg->cmsg_len;
412
413 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
414 r = -errno;
415 goto fail;
416 }
417
418fail:
419 lockf(netns_storage_socket[0], F_ULOCK, 0);
420
15ae422b
LP
421 return r;
422}