]>
Commit | Line | Data |
---|---|---|
d6c9574f | 1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
15ae422b LP |
2 | |
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
9 | under the terms of the GNU Lesser General Public License as published by |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
15ae422b LP |
11 | (at your option) any later version. |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 16 | Lesser General Public License for more details. |
15ae422b | 17 | |
5430f7f2 | 18 | You should have received a copy of the GNU Lesser General Public License |
15ae422b LP |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
20 | ***/ | |
21 | ||
22 | #include <errno.h> | |
23 | #include <sys/mount.h> | |
24 | #include <string.h> | |
25 | #include <stdio.h> | |
26 | #include <unistd.h> | |
27 | #include <sys/stat.h> | |
28 | #include <sys/types.h> | |
29 | #include <sched.h> | |
30 | #include <sys/syscall.h> | |
31 | #include <limits.h> | |
25e870b5 | 32 | #include <linux/fs.h> |
613b411c | 33 | #include <sys/file.h> |
15ae422b LP |
34 | |
35 | #include "strv.h" | |
36 | #include "util.h" | |
9eb977db | 37 | #include "path-util.h" |
15ae422b LP |
38 | #include "namespace.h" |
39 | #include "missing.h" | |
c17ec25e | 40 | #include "execute.h" |
613b411c | 41 | #include "loopback-setup.h" |
15ae422b | 42 | |
c17ec25e | 43 | typedef enum MountMode { |
15ae422b LP |
44 | /* This is ordered by priority! */ |
45 | INACCESSIBLE, | |
46 | READONLY, | |
ac0930c8 LP |
47 | PRIVATE_TMP, |
48 | PRIVATE_VAR_TMP, | |
15ae422b | 49 | READWRITE |
c17ec25e | 50 | } MountMode; |
15ae422b | 51 | |
c17ec25e | 52 | typedef struct BindMount { |
15ae422b | 53 | const char *path; |
c17ec25e | 54 | MountMode mode; |
ac0930c8 | 55 | bool done; |
ea92ae33 | 56 | bool ignore; |
c17ec25e | 57 | } BindMount; |
15ae422b | 58 | |
c17ec25e | 59 | static int append_mounts(BindMount **p, char **strv, MountMode mode) { |
15ae422b LP |
60 | char **i; |
61 | ||
613b411c LP |
62 | assert(p); |
63 | ||
15ae422b LP |
64 | STRV_FOREACH(i, strv) { |
65 | ||
ea92ae33 MW |
66 | (*p)->ignore = false; |
67 | ||
68 | if ((mode == INACCESSIBLE || mode == READONLY) && (*i)[0] == '-') { | |
69 | (*p)->ignore = true; | |
70 | (*i)++; | |
71 | } | |
72 | ||
15ae422b LP |
73 | if (!path_is_absolute(*i)) |
74 | return -EINVAL; | |
75 | ||
76 | (*p)->path = *i; | |
77 | (*p)->mode = mode; | |
78 | (*p)++; | |
79 | } | |
80 | ||
81 | return 0; | |
82 | } | |
83 | ||
c17ec25e MS |
84 | static int mount_path_compare(const void *a, const void *b) { |
85 | const BindMount *p = a, *q = b; | |
15ae422b LP |
86 | |
87 | if (path_equal(p->path, q->path)) { | |
88 | ||
89 | /* If the paths are equal, check the mode */ | |
90 | if (p->mode < q->mode) | |
91 | return -1; | |
92 | ||
93 | if (p->mode > q->mode) | |
94 | return 1; | |
95 | ||
96 | return 0; | |
97 | } | |
98 | ||
99 | /* If the paths are not equal, then order prefixes first */ | |
100 | if (path_startswith(p->path, q->path)) | |
101 | return 1; | |
102 | ||
103 | if (path_startswith(q->path, p->path)) | |
104 | return -1; | |
105 | ||
106 | return 0; | |
107 | } | |
108 | ||
c17ec25e MS |
109 | static void drop_duplicates(BindMount *m, unsigned *n) { |
110 | BindMount *f, *t, *previous; | |
15ae422b | 111 | |
c17ec25e | 112 | assert(m); |
15ae422b | 113 | assert(n); |
15ae422b | 114 | |
c17ec25e | 115 | for (f = m, t = m, previous = NULL; f < m+*n; f++) { |
15ae422b | 116 | |
ac0930c8 | 117 | /* The first one wins */ |
15ae422b LP |
118 | if (previous && path_equal(f->path, previous->path)) |
119 | continue; | |
120 | ||
121 | t->path = f->path; | |
122 | t->mode = f->mode; | |
123 | ||
15ae422b LP |
124 | previous = t; |
125 | ||
126 | t++; | |
127 | } | |
128 | ||
c17ec25e | 129 | *n = t - m; |
15ae422b LP |
130 | } |
131 | ||
ac0930c8 | 132 | static int apply_mount( |
c17ec25e | 133 | BindMount *m, |
ac0930c8 | 134 | const char *tmp_dir, |
c17ec25e | 135 | const char *var_tmp_dir) { |
ac0930c8 | 136 | |
15ae422b | 137 | const char *what; |
15ae422b | 138 | int r; |
15ae422b | 139 | |
c17ec25e | 140 | assert(m); |
15ae422b | 141 | |
c17ec25e | 142 | switch (m->mode) { |
15ae422b LP |
143 | |
144 | case INACCESSIBLE: | |
c17ec25e | 145 | what = "/run/systemd/inaccessible"; |
15ae422b LP |
146 | break; |
147 | ||
148 | case READONLY: | |
15ae422b | 149 | case READWRITE: |
c17ec25e | 150 | what = m->path; |
15ae422b LP |
151 | break; |
152 | ||
ac0930c8 LP |
153 | case PRIVATE_TMP: |
154 | what = tmp_dir; | |
155 | break; | |
156 | ||
157 | case PRIVATE_VAR_TMP: | |
158 | what = var_tmp_dir; | |
15ae422b | 159 | break; |
e364ad06 LP |
160 | |
161 | default: | |
162 | assert_not_reached("Unknown mode"); | |
15ae422b LP |
163 | } |
164 | ||
ac0930c8 | 165 | assert(what); |
15ae422b | 166 | |
c17ec25e | 167 | r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); |
ac0930c8 | 168 | if (r >= 0) |
c17ec25e | 169 | log_debug("Successfully mounted %s to %s", what, m->path); |
ea92ae33 MW |
170 | else if (m->ignore && errno == ENOENT) |
171 | r = 0; | |
15ae422b | 172 | |
ac0930c8 LP |
173 | return r; |
174 | } | |
15ae422b | 175 | |
c17ec25e | 176 | static int make_read_only(BindMount *m) { |
ac0930c8 | 177 | int r; |
15ae422b | 178 | |
c17ec25e | 179 | assert(m); |
ac0930c8 | 180 | |
c17ec25e | 181 | if (m->mode != INACCESSIBLE && m->mode != READONLY) |
ac0930c8 LP |
182 | return 0; |
183 | ||
c17ec25e | 184 | r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL); |
ea92ae33 | 185 | if (r < 0 && !(m->ignore && errno == ENOENT)) |
ac0930c8 LP |
186 | return -errno; |
187 | ||
188 | return 0; | |
15ae422b LP |
189 | } |
190 | ||
613b411c LP |
191 | int setup_namespace( |
192 | char** read_write_dirs, | |
193 | char** read_only_dirs, | |
194 | char** inaccessible_dirs, | |
195 | char* tmp_dir, | |
196 | char* var_tmp_dir, | |
197 | unsigned mount_flags) { | |
15ae422b | 198 | |
7ff7394d | 199 | BindMount *m, *mounts = NULL; |
613b411c | 200 | unsigned n; |
c17ec25e | 201 | int r = 0; |
15ae422b | 202 | |
613b411c | 203 | if (mount_flags == 0) |
c17ec25e | 204 | mount_flags = MS_SHARED; |
ac0930c8 | 205 | |
d5a3f0ea ZJS |
206 | if (unshare(CLONE_NEWNS) < 0) |
207 | return -errno; | |
15ae422b | 208 | |
613b411c LP |
209 | n = !!tmp_dir + !!var_tmp_dir + |
210 | strv_length(read_write_dirs) + | |
211 | strv_length(read_only_dirs) + | |
212 | strv_length(inaccessible_dirs); | |
213 | ||
214 | if (n > 0) { | |
7ff7394d | 215 | m = mounts = (BindMount *) alloca(n * sizeof(BindMount)); |
613b411c LP |
216 | r = append_mounts(&m, read_write_dirs, READWRITE); |
217 | if (r < 0) | |
218 | return r; | |
219 | ||
220 | r = append_mounts(&m, read_only_dirs, READONLY); | |
221 | if (r < 0) | |
222 | return r; | |
223 | ||
224 | r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE); | |
225 | if (r < 0) | |
7ff7394d ZJS |
226 | return r; |
227 | ||
613b411c | 228 | if (tmp_dir) { |
7ff7394d ZJS |
229 | m->path = "/tmp"; |
230 | m->mode = PRIVATE_TMP; | |
231 | m++; | |
613b411c | 232 | } |
7ff7394d | 233 | |
613b411c | 234 | if (var_tmp_dir) { |
7ff7394d ZJS |
235 | m->path = "/var/tmp"; |
236 | m->mode = PRIVATE_VAR_TMP; | |
237 | m++; | |
238 | } | |
ac0930c8 | 239 | |
7ff7394d | 240 | assert(mounts + n == m); |
ac0930c8 | 241 | |
7ff7394d ZJS |
242 | qsort(mounts, n, sizeof(BindMount), mount_path_compare); |
243 | drop_duplicates(mounts, &n); | |
15ae422b LP |
244 | } |
245 | ||
ac0930c8 | 246 | /* Remount / as SLAVE so that nothing now mounted in the namespace |
dc4b0200 | 247 | shows up in the parent */ |
d5a3f0ea ZJS |
248 | if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) |
249 | return -errno; | |
15ae422b | 250 | |
c17ec25e MS |
251 | for (m = mounts; m < mounts + n; ++m) { |
252 | r = apply_mount(m, tmp_dir, var_tmp_dir); | |
c1d70f7c | 253 | if (r < 0) |
613b411c | 254 | goto fail; |
c1d70f7c | 255 | } |
15ae422b | 256 | |
c17ec25e MS |
257 | for (m = mounts; m < mounts + n; ++m) { |
258 | r = make_read_only(m); | |
ac0930c8 | 259 | if (r < 0) |
613b411c | 260 | goto fail; |
15ae422b LP |
261 | } |
262 | ||
ac0930c8 | 263 | /* Remount / as the desired mode */ |
c17ec25e | 264 | if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { |
15ae422b | 265 | r = -errno; |
613b411c | 266 | goto fail; |
15ae422b LP |
267 | } |
268 | ||
15ae422b LP |
269 | return 0; |
270 | ||
613b411c LP |
271 | fail: |
272 | for (m = mounts; m < mounts + n; ++m) | |
c17ec25e MS |
273 | if (m->done) |
274 | umount2(m->path, MNT_DETACH); | |
613b411c LP |
275 | |
276 | return r; | |
277 | } | |
278 | ||
279 | static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { | |
280 | _cleanup_free_ char *x = NULL; | |
281 | ||
282 | assert(id); | |
283 | assert(prefix); | |
284 | assert(path); | |
285 | ||
286 | x = strjoin(prefix, "/systemd-", id, "-XXXXXX", NULL); | |
287 | if (!x) | |
288 | return -ENOMEM; | |
289 | ||
290 | RUN_WITH_UMASK(0077) | |
291 | if (!mkdtemp(x)) | |
292 | return -errno; | |
293 | ||
294 | RUN_WITH_UMASK(0000) { | |
295 | char *y; | |
296 | ||
297 | y = strappenda(x, "/tmp"); | |
298 | ||
299 | if (mkdir(y, 0777 | S_ISVTX) < 0) | |
300 | return -errno; | |
c17ec25e | 301 | } |
15ae422b | 302 | |
613b411c LP |
303 | *path = x; |
304 | x = NULL; | |
305 | ||
306 | return 0; | |
307 | } | |
308 | ||
309 | int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) { | |
310 | char *a, *b; | |
311 | int r; | |
312 | ||
313 | assert(id); | |
314 | assert(tmp_dir); | |
315 | assert(var_tmp_dir); | |
316 | ||
317 | r = setup_one_tmp_dir(id, "/tmp", &a); | |
318 | if (r < 0) | |
319 | return r; | |
320 | ||
321 | r = setup_one_tmp_dir(id, "/var/tmp", &b); | |
322 | if (r < 0) { | |
323 | char *t; | |
324 | ||
325 | t = strappenda(a, "/tmp"); | |
326 | rmdir(t); | |
327 | rmdir(a); | |
328 | ||
329 | free(a); | |
330 | return r; | |
331 | } | |
332 | ||
333 | *tmp_dir = a; | |
334 | *var_tmp_dir = b; | |
335 | ||
336 | return 0; | |
337 | } | |
338 | ||
339 | int setup_netns(int netns_storage_socket[2]) { | |
340 | _cleanup_close_ int netns = -1; | |
341 | union { | |
342 | struct cmsghdr cmsghdr; | |
343 | uint8_t buf[CMSG_SPACE(sizeof(int))]; | |
344 | } control = {}; | |
345 | struct msghdr mh = { | |
346 | .msg_control = &control, | |
347 | .msg_controllen = sizeof(control), | |
348 | }; | |
349 | struct cmsghdr *cmsg; | |
350 | int r; | |
351 | ||
352 | assert(netns_storage_socket); | |
353 | assert(netns_storage_socket[0] >= 0); | |
354 | assert(netns_storage_socket[1] >= 0); | |
355 | ||
356 | /* We use the passed socketpair as a storage buffer for our | |
357 | * namespace socket. Whatever process runs this first shall | |
358 | * create a new namespace, all others should just join it. To | |
359 | * serialize that we use a file lock on the socket pair. | |
360 | * | |
361 | * It's a bit crazy, but hey, works great! */ | |
362 | ||
363 | if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0) | |
364 | return -errno; | |
365 | ||
366 | if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) { | |
367 | if (errno != EAGAIN) { | |
368 | r = -errno; | |
369 | goto fail; | |
370 | } | |
371 | ||
372 | /* Nothing stored yet, so let's create a new namespace */ | |
373 | ||
374 | if (unshare(CLONE_NEWNET) < 0) { | |
375 | r = -errno; | |
376 | goto fail; | |
377 | } | |
378 | ||
379 | loopback_setup(); | |
380 | ||
381 | netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY); | |
382 | if (netns < 0) { | |
383 | r = -errno; | |
384 | goto fail; | |
385 | } | |
386 | ||
387 | r = 1; | |
388 | } else { | |
389 | /* Yay, found something, so let's join the namespace */ | |
390 | ||
391 | for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) { | |
392 | if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { | |
393 | assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int))); | |
394 | netns = *(int*) CMSG_DATA(cmsg); | |
395 | } | |
396 | } | |
397 | ||
398 | if (setns(netns, CLONE_NEWNET) < 0) { | |
399 | r = -errno; | |
400 | goto fail; | |
401 | } | |
402 | ||
403 | r = 0; | |
404 | } | |
405 | ||
406 | cmsg = CMSG_FIRSTHDR(&mh); | |
407 | cmsg->cmsg_level = SOL_SOCKET; | |
408 | cmsg->cmsg_type = SCM_RIGHTS; | |
409 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | |
410 | memcpy(CMSG_DATA(cmsg), &netns, sizeof(int)); | |
411 | mh.msg_controllen = cmsg->cmsg_len; | |
412 | ||
413 | if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) { | |
414 | r = -errno; | |
415 | goto fail; | |
416 | } | |
417 | ||
418 | fail: | |
419 | lockf(netns_storage_socket[0], F_ULOCK, 0); | |
420 | ||
15ae422b LP |
421 | return r; |
422 | } |