]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
e83bebef | 2 | |
4f5dd394 | 3 | #include <sys/mount.h> |
07630cea | 4 | #include <linux/magic.h> |
e83bebef | 5 | |
b5efdb8a | 6 | #include "alloc-util.h" |
4f5dd394 | 7 | #include "escape.h" |
0996ef00 CB |
8 | #include "fd-util.h" |
9 | #include "fileio.h" | |
f4f15635 | 10 | #include "fs-util.h" |
e83bebef | 11 | #include "label.h" |
4f5dd394 | 12 | #include "mkdir.h" |
4349cd7c | 13 | #include "mount-util.h" |
6bedfcbb LP |
14 | #include "nspawn-mount.h" |
15 | #include "parse-util.h" | |
4f5dd394 LP |
16 | #include "path-util.h" |
17 | #include "rm-rf.h" | |
e83bebef | 18 | #include "set.h" |
8fcde012 | 19 | #include "stat-util.h" |
07630cea | 20 | #include "string-util.h" |
4f5dd394 | 21 | #include "strv.h" |
ee104e11 | 22 | #include "user-util.h" |
4f5dd394 | 23 | #include "util.h" |
e83bebef | 24 | |
88614c8a | 25 | CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) { |
e83bebef LP |
26 | CustomMount *c, *ret; |
27 | ||
28 | assert(l); | |
29 | assert(n); | |
30 | assert(t >= 0); | |
31 | assert(t < _CUSTOM_MOUNT_TYPE_MAX); | |
32 | ||
aa484f35 | 33 | c = reallocarray(*l, *n + 1, sizeof(CustomMount)); |
e83bebef LP |
34 | if (!c) |
35 | return NULL; | |
36 | ||
37 | *l = c; | |
38 | ret = *l + *n; | |
39 | (*n)++; | |
40 | ||
41 | *ret = (CustomMount) { .type = t }; | |
42 | ||
43 | return ret; | |
44 | } | |
45 | ||
88614c8a LP |
46 | void custom_mount_free_all(CustomMount *l, size_t n) { |
47 | size_t i; | |
e83bebef LP |
48 | |
49 | for (i = 0; i < n; i++) { | |
50 | CustomMount *m = l + i; | |
51 | ||
52 | free(m->source); | |
53 | free(m->destination); | |
54 | free(m->options); | |
55 | ||
56 | if (m->work_dir) { | |
57 | (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL); | |
58 | free(m->work_dir); | |
59 | } | |
60 | ||
c7a4890c LP |
61 | if (m->rm_rf_tmpdir) { |
62 | (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL); | |
63 | free(m->rm_rf_tmpdir); | |
64 | } | |
65 | ||
e83bebef LP |
66 | strv_free(m->lower); |
67 | } | |
68 | ||
69 | free(l); | |
70 | } | |
71 | ||
93bab288 | 72 | static int custom_mount_compare(const CustomMount *a, const CustomMount *b) { |
e83bebef LP |
73 | int r; |
74 | ||
93bab288 | 75 | r = path_compare(a->destination, b->destination); |
e83bebef LP |
76 | if (r != 0) |
77 | return r; | |
78 | ||
93bab288 | 79 | return CMP(a->type, b->type); |
e83bebef LP |
80 | } |
81 | ||
86c0dd4a LP |
82 | static bool source_path_is_valid(const char *p) { |
83 | assert(p); | |
84 | ||
85 | if (*p == '+') | |
86 | p++; | |
87 | ||
88 | return path_is_absolute(p); | |
89 | } | |
90 | ||
91 | static char *resolve_source_path(const char *dest, const char *source) { | |
92 | ||
93 | if (!source) | |
94 | return NULL; | |
95 | ||
96 | if (source[0] == '+') | |
97 | return prefix_root(dest, source + 1); | |
98 | ||
99 | return strdup(source); | |
100 | } | |
101 | ||
88614c8a LP |
102 | int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) { |
103 | size_t i; | |
86c0dd4a LP |
104 | int r; |
105 | ||
106 | /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the | |
107 | * parent process, so that we know the temporary directories to remove on exit before we fork off the | |
108 | * children. */ | |
109 | ||
110 | assert(l || n == 0); | |
111 | ||
112 | /* Order the custom mounts, and make sure we have a working directory */ | |
93bab288 | 113 | typesafe_qsort(l, n, custom_mount_compare); |
86c0dd4a LP |
114 | |
115 | for (i = 0; i < n; i++) { | |
116 | CustomMount *m = l + i; | |
117 | ||
118 | if (m->source) { | |
119 | char *s; | |
120 | ||
121 | s = resolve_source_path(dest, m->source); | |
122 | if (!s) | |
123 | return log_oom(); | |
124 | ||
10af01a5 | 125 | free_and_replace(m->source, s); |
c7a4890c LP |
126 | } else { |
127 | /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */ | |
128 | ||
129 | m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX"); | |
130 | if (!m->rm_rf_tmpdir) | |
131 | return log_oom(); | |
132 | ||
133 | if (!mkdtemp(m->rm_rf_tmpdir)) { | |
134 | m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir); | |
135 | return log_error_errno(errno, "Failed to acquire temporary directory: %m"); | |
136 | } | |
137 | ||
138 | m->source = strjoin(m->rm_rf_tmpdir, "/src"); | |
139 | if (!m->source) | |
140 | return log_oom(); | |
141 | ||
142 | if (mkdir(m->source, 0755) < 0) | |
143 | return log_error_errno(errno, "Failed to create %s: %m", m->source); | |
86c0dd4a LP |
144 | } |
145 | ||
146 | if (m->type == CUSTOM_MOUNT_OVERLAY) { | |
147 | char **j; | |
148 | ||
149 | STRV_FOREACH(j, m->lower) { | |
150 | char *s; | |
151 | ||
152 | s = resolve_source_path(dest, *j); | |
153 | if (!s) | |
154 | return log_oom(); | |
155 | ||
10af01a5 | 156 | free_and_replace(*j, s); |
86c0dd4a LP |
157 | } |
158 | ||
159 | if (m->work_dir) { | |
160 | char *s; | |
161 | ||
162 | s = resolve_source_path(dest, m->work_dir); | |
163 | if (!s) | |
164 | return log_oom(); | |
165 | ||
10af01a5 | 166 | free_and_replace(m->work_dir, s); |
86c0dd4a LP |
167 | } else { |
168 | assert(m->source); | |
169 | ||
170 | r = tempfn_random(m->source, NULL, &m->work_dir); | |
171 | if (r < 0) | |
172 | return log_error_errno(r, "Failed to acquire working directory: %m"); | |
173 | } | |
174 | ||
175 | (void) mkdir_label(m->work_dir, 0700); | |
176 | } | |
177 | } | |
178 | ||
179 | return 0; | |
180 | } | |
181 | ||
88614c8a | 182 | int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) { |
e83bebef LP |
183 | _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL; |
184 | const char *p = s; | |
185 | CustomMount *m; | |
186 | int r; | |
187 | ||
188 | assert(l); | |
189 | assert(n); | |
190 | ||
191 | r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL); | |
192 | if (r < 0) | |
193 | return r; | |
194 | if (r == 0) | |
195 | return -EINVAL; | |
e83bebef | 196 | if (r == 1) { |
86c0dd4a | 197 | destination = strdup(source[0] == '+' ? source+1 : source); |
e83bebef LP |
198 | if (!destination) |
199 | return -ENOMEM; | |
200 | } | |
e83bebef LP |
201 | if (r == 2 && !isempty(p)) { |
202 | opts = strdup(p); | |
203 | if (!opts) | |
204 | return -ENOMEM; | |
205 | } | |
206 | ||
c7a4890c LP |
207 | if (isempty(source)) |
208 | source = NULL; | |
209 | else if (!source_path_is_valid(source)) | |
e83bebef | 210 | return -EINVAL; |
c7a4890c | 211 | |
e83bebef LP |
212 | if (!path_is_absolute(destination)) |
213 | return -EINVAL; | |
214 | ||
215 | m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND); | |
216 | if (!m) | |
48cbe5f8 | 217 | return -ENOMEM; |
e83bebef LP |
218 | |
219 | m->source = source; | |
220 | m->destination = destination; | |
221 | m->read_only = read_only; | |
222 | m->options = opts; | |
223 | ||
224 | source = destination = opts = NULL; | |
225 | return 0; | |
226 | } | |
227 | ||
88614c8a | 228 | int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) { |
e83bebef LP |
229 | _cleanup_free_ char *path = NULL, *opts = NULL; |
230 | const char *p = s; | |
231 | CustomMount *m; | |
232 | int r; | |
233 | ||
234 | assert(l); | |
235 | assert(n); | |
236 | assert(s); | |
237 | ||
238 | r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS); | |
239 | if (r < 0) | |
240 | return r; | |
241 | if (r == 0) | |
242 | return -EINVAL; | |
243 | ||
244 | if (isempty(p)) | |
245 | opts = strdup("mode=0755"); | |
246 | else | |
247 | opts = strdup(p); | |
248 | if (!opts) | |
249 | return -ENOMEM; | |
250 | ||
251 | if (!path_is_absolute(path)) | |
252 | return -EINVAL; | |
253 | ||
254 | m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS); | |
255 | if (!m) | |
256 | return -ENOMEM; | |
257 | ||
1cc6c93a YW |
258 | m->destination = TAKE_PTR(path); |
259 | m->options = TAKE_PTR(opts); | |
e83bebef | 260 | |
e83bebef LP |
261 | return 0; |
262 | } | |
263 | ||
88614c8a | 264 | int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) { |
ad85779a LP |
265 | _cleanup_free_ char *upper = NULL, *destination = NULL; |
266 | _cleanup_strv_free_ char **lower = NULL; | |
267 | CustomMount *m; | |
86c0dd4a | 268 | int k; |
ad85779a | 269 | |
86c0dd4a LP |
270 | k = strv_split_extract(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS); |
271 | if (k < 0) | |
272 | return k; | |
ad85779a LP |
273 | if (k < 2) |
274 | return -EADDRNOTAVAIL; | |
275 | if (k == 2) { | |
86c0dd4a LP |
276 | /* If two parameters are specified, the first one is the lower, the second one the upper directory. And |
277 | * we'll also define the destination mount point the same as the upper. */ | |
278 | ||
279 | if (!source_path_is_valid(lower[0]) || | |
280 | !source_path_is_valid(lower[1])) | |
281 | return -EINVAL; | |
282 | ||
ae2a15bc | 283 | upper = TAKE_PTR(lower[1]); |
ad85779a | 284 | |
86c0dd4a | 285 | destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */ |
ad85779a LP |
286 | if (!destination) |
287 | return -ENOMEM; | |
ad85779a | 288 | } else { |
c7a4890c | 289 | char **i; |
86c0dd4a LP |
290 | |
291 | /* If more than two parameters are specified, the last one is the destination, the second to last one | |
292 | * the "upper", and all before that the "lower" directories. */ | |
293 | ||
ad85779a | 294 | destination = lower[k - 1]; |
ae2a15bc | 295 | upper = TAKE_PTR(lower[k - 2]); |
86c0dd4a | 296 | |
c7a4890c LP |
297 | STRV_FOREACH(i, lower) |
298 | if (!source_path_is_valid(*i)) | |
299 | return -EINVAL; | |
300 | ||
301 | /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory | |
302 | * in /var/tmp */ | |
303 | if (isempty(upper)) | |
304 | upper = NULL; | |
305 | else if (!source_path_is_valid(upper)) | |
306 | return -EINVAL; | |
307 | ||
86c0dd4a LP |
308 | if (!path_is_absolute(destination)) |
309 | return -EINVAL; | |
ad85779a LP |
310 | } |
311 | ||
312 | m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY); | |
313 | if (!m) | |
314 | return -ENOMEM; | |
315 | ||
1cc6c93a YW |
316 | m->destination = TAKE_PTR(destination); |
317 | m->source = TAKE_PTR(upper); | |
318 | m->lower = TAKE_PTR(lower); | |
ad85779a LP |
319 | m->read_only = read_only; |
320 | ||
ad85779a LP |
321 | return 0; |
322 | } | |
323 | ||
04029482 | 324 | int tmpfs_patch_options( |
e83bebef | 325 | const char *options, |
2fa017f1 | 326 | uid_t uid_shift, |
e83bebef LP |
327 | const char *selinux_apifs_context, |
328 | char **ret) { | |
329 | ||
330 | char *buf = NULL; | |
331 | ||
2fa017f1 | 332 | if (uid_shift != UID_INVALID) { |
9aa2169e | 333 | if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT, |
87e4e28d | 334 | strempty(options), options ? "," : "", |
9aa2169e | 335 | uid_shift, uid_shift) < 0) |
e83bebef LP |
336 | return -ENOMEM; |
337 | ||
338 | options = buf; | |
339 | } | |
340 | ||
349cc4a5 | 341 | #if HAVE_SELINUX |
e83bebef LP |
342 | if (selinux_apifs_context) { |
343 | char *t; | |
344 | ||
87e4e28d | 345 | t = strjoin(strempty(options), options ? "," : "", |
9aa2169e ZJS |
346 | "context=\"", selinux_apifs_context, "\""); |
347 | free(buf); | |
348 | if (!t) | |
e83bebef | 349 | return -ENOMEM; |
e83bebef | 350 | |
e83bebef LP |
351 | buf = t; |
352 | } | |
353 | #endif | |
354 | ||
0996ef00 CB |
355 | if (!buf && options) { |
356 | buf = strdup(options); | |
357 | if (!buf) | |
358 | return -ENOMEM; | |
359 | } | |
e83bebef | 360 | *ret = buf; |
0996ef00 | 361 | |
e83bebef LP |
362 | return !!buf; |
363 | } | |
364 | ||
4f086aab | 365 | int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { |
d8fc6a00 | 366 | const char *full, *top, *x; |
d1678248 | 367 | int r; |
4f086aab | 368 | unsigned long extra_flags = 0; |
d8fc6a00 LP |
369 | |
370 | top = prefix_roota(dest, "/sys"); | |
40fd52f2 | 371 | r = path_is_fs_type(top, SYSFS_MAGIC); |
d1678248 ILG |
372 | if (r < 0) |
373 | return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); | |
374 | /* /sys might already be mounted as sysfs by the outer child in the | |
375 | * !netns case. In this case, it's all good. Don't touch it because we | |
376 | * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555. | |
377 | */ | |
378 | if (r > 0) | |
379 | return 0; | |
380 | ||
d8fc6a00 LP |
381 | full = prefix_roota(top, "/full"); |
382 | ||
383 | (void) mkdir(full, 0755); | |
384 | ||
4f086aab SU |
385 | if (mount_settings & MOUNT_APPLY_APIVFS_RO) |
386 | extra_flags |= MS_RDONLY; | |
387 | ||
60e76d48 | 388 | r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs", |
4f086aab | 389 | MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL); |
60e76d48 ZJS |
390 | if (r < 0) |
391 | return r; | |
d8fc6a00 LP |
392 | |
393 | FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") { | |
394 | _cleanup_free_ char *from = NULL, *to = NULL; | |
395 | ||
396 | from = prefix_root(full, x); | |
397 | if (!from) | |
398 | return log_oom(); | |
399 | ||
400 | to = prefix_root(top, x); | |
401 | if (!to) | |
402 | return log_oom(); | |
403 | ||
404 | (void) mkdir(to, 0755); | |
405 | ||
60e76d48 ZJS |
406 | r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL); |
407 | if (r < 0) | |
408 | return r; | |
d8fc6a00 | 409 | |
60e76d48 | 410 | r = mount_verbose(LOG_ERR, NULL, to, NULL, |
4f086aab | 411 | MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); |
60e76d48 ZJS |
412 | if (r < 0) |
413 | return r; | |
d8fc6a00 LP |
414 | } |
415 | ||
60e76d48 ZJS |
416 | r = umount_verbose(full); |
417 | if (r < 0) | |
418 | return r; | |
d8fc6a00 LP |
419 | |
420 | if (rmdir(full) < 0) | |
421 | return log_error_errno(errno, "Failed to remove %s: %m", full); | |
422 | ||
0996ef00 CB |
423 | /* Create mountpoint for cgroups. Otherwise we are not allowed since we |
424 | * remount /sys read-only. | |
425 | */ | |
677a72cd LS |
426 | x = prefix_roota(top, "/fs/cgroup"); |
427 | (void) mkdir_p(x, 0755); | |
d8fc6a00 | 428 | |
60e76d48 | 429 | return mount_verbose(LOG_ERR, NULL, top, NULL, |
4f086aab | 430 | MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); |
d8fc6a00 LP |
431 | } |
432 | ||
9c0fad5f | 433 | static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) { |
63eae723 EV |
434 | int r; |
435 | ||
436 | assert(path); | |
437 | ||
dae8b82e ZJS |
438 | r = mkdir_errno_wrapper(path, mode); |
439 | if (r < 0 && r != -EEXIST) | |
440 | return r; | |
63eae723 | 441 | |
9c0fad5f | 442 | if (uid_shift == UID_INVALID) |
acbbf69b LP |
443 | return 0; |
444 | ||
dae8b82e | 445 | if (lchown(path, uid_shift, uid_shift) < 0) |
acbbf69b | 446 | return -errno; |
63eae723 EV |
447 | |
448 | return 0; | |
449 | } | |
450 | ||
9c0fad5f | 451 | static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) { |
63eae723 EV |
452 | const char *p, *e; |
453 | int r; | |
454 | ||
455 | assert(path); | |
456 | ||
457 | if (prefix && !path_startswith(path, prefix)) | |
458 | return -ENOTDIR; | |
459 | ||
460 | /* create every parent directory in the path, except the last component */ | |
461 | p = path + strspn(path, "/"); | |
462 | for (;;) { | |
463 | char t[strlen(path) + 1]; | |
464 | ||
465 | e = p + strcspn(p, "/"); | |
466 | p = e + strspn(e, "/"); | |
467 | ||
468 | /* Is this the last component? If so, then we're done */ | |
469 | if (*p == 0) | |
470 | break; | |
471 | ||
472 | memcpy(t, path, e - path); | |
473 | t[e-path] = 0; | |
474 | ||
475 | if (prefix && path_startswith(prefix, t)) | |
476 | continue; | |
477 | ||
9c0fad5f | 478 | r = mkdir_userns(t, mode, uid_shift); |
63eae723 EV |
479 | if (r < 0) |
480 | return r; | |
481 | } | |
482 | ||
9c0fad5f | 483 | return mkdir_userns(path, mode, uid_shift); |
63eae723 EV |
484 | } |
485 | ||
e83bebef | 486 | int mount_all(const char *dest, |
4f086aab | 487 | MountSettingsMask mount_settings, |
2fa017f1 | 488 | uid_t uid_shift, |
e83bebef LP |
489 | const char *selinux_apifs_context) { |
490 | ||
d4b653c5 LP |
491 | #define PROC_INACCESSIBLE(path) \ |
492 | { NULL, (path), NULL, NULL, MS_BIND, \ | |
493 | MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_INACCESSIBLE_REG }, /* Bind mount first ... */ \ | |
494 | { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \ | |
495 | MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */ | |
496 | ||
497 | #define PROC_READ_ONLY(path) \ | |
498 | { (path), (path), NULL, NULL, MS_BIND, \ | |
499 | MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \ | |
500 | { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \ | |
501 | MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */ | |
502 | ||
e83bebef LP |
503 | typedef struct MountPoint { |
504 | const char *what; | |
505 | const char *where; | |
506 | const char *type; | |
507 | const char *options; | |
508 | unsigned long flags; | |
4f086aab | 509 | MountSettingsMask mount_settings; |
e83bebef LP |
510 | } MountPoint; |
511 | ||
512 | static const MountPoint mount_table[] = { | |
d4b653c5 LP |
513 | /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */ |
514 | { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, | |
515 | MOUNT_FATAL|MOUNT_IN_USERNS }, | |
516 | ||
517 | { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, | |
518 | MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ | |
519 | ||
520 | { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, | |
521 | MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */ | |
522 | ||
523 | { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, | |
524 | MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ | |
525 | ||
526 | /* Make these files inaccessible to container payloads: they potentially leak information about kernel | |
527 | * internals or the host's execution environment to the container */ | |
528 | PROC_INACCESSIBLE("/proc/kallsyms"), | |
529 | PROC_INACCESSIBLE("/proc/kcore"), | |
530 | PROC_INACCESSIBLE("/proc/keys"), | |
531 | PROC_INACCESSIBLE("/proc/sysrq-trigger"), | |
532 | PROC_INACCESSIBLE("/proc/timer_list"), | |
533 | ||
534 | /* Make these directories read-only to container payloads: they show hardware information, and in some | |
535 | * cases contain tunables the container really shouldn't have access to. */ | |
536 | PROC_READ_ONLY("/proc/acpi"), | |
537 | PROC_READ_ONLY("/proc/apm"), | |
538 | PROC_READ_ONLY("/proc/asound"), | |
539 | PROC_READ_ONLY("/proc/bus"), | |
540 | PROC_READ_ONLY("/proc/fs"), | |
541 | PROC_READ_ONLY("/proc/irq"), | |
542 | PROC_READ_ONLY("/proc/scsi"), | |
543 | ||
544 | /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */ | |
545 | { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, | |
1099ceeb | 546 | MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP }, |
03d0f4b5 | 547 | { "tmpfs", "/sys", "tmpfs", "mode=555", MS_NOSUID|MS_NOEXEC|MS_NODEV, |
d4b653c5 LP |
548 | MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS }, |
549 | { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, | |
550 | MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */ | |
551 | { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, | |
552 | MOUNT_FATAL }, /* skipped if above was mounted */ | |
553 | { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, | |
554 | MOUNT_FATAL }, | |
555 | { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, | |
556 | MOUNT_FATAL }, | |
557 | { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, | |
558 | MOUNT_FATAL }, | |
559 | ||
349cc4a5 | 560 | #if HAVE_SELINUX |
d4b653c5 LP |
561 | { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, |
562 | 0 }, /* Bind mount first */ | |
563 | { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, | |
564 | 0 }, /* Then, make it r/o */ | |
e83bebef LP |
565 | #endif |
566 | }; | |
567 | ||
d4b653c5 | 568 | _cleanup_(unlink_and_freep) char *inaccessible = NULL; |
4f086aab SU |
569 | bool use_userns = (mount_settings & MOUNT_USE_USERNS); |
570 | bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS); | |
571 | bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO); | |
572 | bool in_userns = (mount_settings & MOUNT_IN_USERNS); | |
1099ceeb | 573 | bool tmpfs_tmp = (mount_settings & MOUNT_APPLY_TMPFS_TMP); |
d4b653c5 | 574 | size_t k; |
88614c8a | 575 | int r; |
e83bebef LP |
576 | |
577 | for (k = 0; k < ELEMENTSOF(mount_table); k++) { | |
578 | _cleanup_free_ char *where = NULL, *options = NULL; | |
d4b653c5 | 579 | const char *o, *what; |
4f086aab SU |
580 | bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL); |
581 | ||
582 | if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS)) | |
583 | continue; | |
e83bebef | 584 | |
4f086aab | 585 | if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS)) |
d1678248 ILG |
586 | continue; |
587 | ||
4f086aab | 588 | if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO)) |
e83bebef LP |
589 | continue; |
590 | ||
1099ceeb LP |
591 | if (!tmpfs_tmp && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_TMPFS_TMP)) |
592 | continue; | |
593 | ||
cb638b5e | 594 | r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where); |
8ce48cf0 | 595 | if (r < 0) |
ec57bd42 | 596 | return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where); |
e83bebef | 597 | |
d4b653c5 LP |
598 | if (mount_table[k].mount_settings & MOUNT_INACCESSIBLE_REG) { |
599 | ||
600 | if (!inaccessible) { | |
601 | _cleanup_free_ char *np = NULL; | |
602 | ||
603 | r = tempfn_random_child(NULL, "inaccessible", &np); | |
604 | if (r < 0) | |
605 | return log_error_errno(r, "Failed to generate inaccessible file node path: %m"); | |
606 | ||
607 | r = touch_file(np, false, USEC_INFINITY, UID_INVALID, GID_INVALID, 0000); | |
608 | if (r < 0) | |
609 | return log_error_errno(r, "Failed to create inaccessible file node '%s': %m", np); | |
610 | ||
611 | inaccessible = TAKE_PTR(np); | |
612 | } | |
613 | ||
614 | what = inaccessible; | |
615 | } else | |
616 | what = mount_table[k].what; | |
617 | ||
8ce48cf0 | 618 | r = path_is_mount_point(where, NULL, 0); |
e83bebef LP |
619 | if (r < 0 && r != -ENOENT) |
620 | return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where); | |
621 | ||
622 | /* Skip this entry if it is not a remount. */ | |
d4b653c5 | 623 | if (what && r > 0) |
e83bebef LP |
624 | continue; |
625 | ||
9c0fad5f | 626 | r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID); |
920a7899 | 627 | if (r < 0 && r != -EEXIST) { |
4f13e534 | 628 | if (fatal && r != -EROFS) |
e83bebef LP |
629 | return log_error_errno(r, "Failed to create directory %s: %m", where); |
630 | ||
201b13c8 | 631 | log_debug_errno(r, "Failed to create directory %s: %m", where); |
4f13e534 LT |
632 | /* If we failed mkdir() or chown() due to the root |
633 | * directory being read only, attempt to mount this fs | |
634 | * anyway and let mount_verbose log any errors */ | |
635 | if (r != -EROFS) | |
636 | continue; | |
e83bebef LP |
637 | } |
638 | ||
639 | o = mount_table[k].options; | |
640 | if (streq_ptr(mount_table[k].type, "tmpfs")) { | |
2fa017f1 | 641 | r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options); |
e83bebef LP |
642 | if (r < 0) |
643 | return log_oom(); | |
644 | if (r > 0) | |
645 | o = options; | |
646 | } | |
647 | ||
4f086aab | 648 | r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG, |
d4b653c5 | 649 | what, |
60e76d48 ZJS |
650 | where, |
651 | mount_table[k].type, | |
652 | mount_table[k].flags, | |
653 | o); | |
4f086aab | 654 | if (r < 0 && fatal) |
60e76d48 | 655 | return r; |
e83bebef LP |
656 | } |
657 | ||
658 | return 0; | |
659 | } | |
660 | ||
e83bebef | 661 | static int mount_bind(const char *dest, CustomMount *m) { |
68cf43c3 | 662 | |
72d967df | 663 | _cleanup_free_ char *where = NULL; |
68cf43c3 | 664 | struct stat source_st, dest_st; |
e83bebef LP |
665 | int r; |
666 | ||
86c0dd4a | 667 | assert(dest); |
e83bebef LP |
668 | assert(m); |
669 | ||
e83bebef LP |
670 | if (stat(m->source, &source_st) < 0) |
671 | return log_error_errno(errno, "Failed to stat %s: %m", m->source); | |
672 | ||
cb638b5e | 673 | r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where); |
68cf43c3 | 674 | if (r < 0) |
ec57bd42 | 675 | return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination); |
8ce48cf0 LP |
676 | if (r > 0) { /* Path exists already? */ |
677 | ||
678 | if (stat(where, &dest_st) < 0) | |
679 | return log_error_errno(errno, "Failed to stat %s: %m", where); | |
e83bebef | 680 | |
baaa35ad ZJS |
681 | if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) |
682 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
683 | "Cannot bind mount directory %s on file %s.", | |
684 | m->source, where); | |
685 | ||
686 | if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) | |
687 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), | |
688 | "Cannot bind mount file %s on directory %s.", | |
689 | m->source, where); | |
e83bebef | 690 | |
8ce48cf0 | 691 | } else { /* Path doesn't exist yet? */ |
e83bebef LP |
692 | r = mkdir_parents_label(where, 0755); |
693 | if (r < 0) | |
694 | return log_error_errno(r, "Failed to make parents of %s: %m", where); | |
b97e83cb BN |
695 | |
696 | /* Create the mount point. Any non-directory file can be | |
697 | * mounted on any non-directory file (regular, fifo, socket, | |
698 | * char, block). | |
699 | */ | |
700 | if (S_ISDIR(source_st.st_mode)) | |
701 | r = mkdir_label(where, 0755); | |
702 | else | |
703 | r = touch(where); | |
704 | if (r < 0) | |
705 | return log_error_errno(r, "Failed to create mount point %s: %m", where); | |
706 | ||
8ce48cf0 | 707 | } |
e83bebef | 708 | |
72d967df | 709 | r = mount_verbose(LOG_ERR, m->source, where, NULL, MS_BIND | MS_REC, m->options); |
60e76d48 ZJS |
710 | if (r < 0) |
711 | return r; | |
e83bebef LP |
712 | |
713 | if (m->read_only) { | |
6b7c9f8b | 714 | r = bind_remount_recursive(where, true, NULL); |
e83bebef LP |
715 | if (r < 0) |
716 | return log_error_errno(r, "Read-only bind mount failed: %m"); | |
717 | } | |
718 | ||
719 | return 0; | |
720 | } | |
721 | ||
722 | static int mount_tmpfs( | |
723 | const char *dest, | |
724 | CustomMount *m, | |
725 | bool userns, uid_t uid_shift, uid_t uid_range, | |
726 | const char *selinux_apifs_context) { | |
727 | ||
68cf43c3 LP |
728 | const char *options; |
729 | _cleanup_free_ char *buf = NULL, *where = NULL; | |
e83bebef LP |
730 | int r; |
731 | ||
732 | assert(dest); | |
733 | assert(m); | |
734 | ||
cb638b5e | 735 | r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where); |
68cf43c3 | 736 | if (r < 0) |
ec57bd42 | 737 | return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination); |
8ce48cf0 LP |
738 | if (r == 0) { /* Doesn't exist yet? */ |
739 | r = mkdir_p_label(where, 0755); | |
740 | if (r < 0) | |
741 | return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where); | |
742 | } | |
e83bebef | 743 | |
2fa017f1 | 744 | r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); |
e83bebef LP |
745 | if (r < 0) |
746 | return log_oom(); | |
747 | options = r > 0 ? buf : m->options; | |
748 | ||
60e76d48 | 749 | return mount_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options); |
e83bebef LP |
750 | } |
751 | ||
86c0dd4a | 752 | static char *joined_and_escaped_lower_dirs(char **lower) { |
e83bebef LP |
753 | _cleanup_strv_free_ char **sv = NULL; |
754 | ||
755 | sv = strv_copy(lower); | |
756 | if (!sv) | |
757 | return NULL; | |
758 | ||
759 | strv_reverse(sv); | |
760 | ||
761 | if (!strv_shell_escape(sv, ",:")) | |
762 | return NULL; | |
763 | ||
764 | return strv_join(sv, ":"); | |
765 | } | |
766 | ||
767 | static int mount_overlay(const char *dest, CustomMount *m) { | |
68cf43c3 | 768 | |
86c0dd4a | 769 | _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL; |
68cf43c3 | 770 | const char *options; |
e83bebef LP |
771 | int r; |
772 | ||
773 | assert(dest); | |
774 | assert(m); | |
775 | ||
cb638b5e | 776 | r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where); |
68cf43c3 | 777 | if (r < 0) |
ec57bd42 | 778 | return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination); |
8ce48cf0 LP |
779 | if (r == 0) { /* Doesn't exist yet? */ |
780 | r = mkdir_label(where, 0755); | |
781 | if (r < 0) | |
782 | return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where); | |
783 | } | |
e83bebef LP |
784 | |
785 | (void) mkdir_p_label(m->source, 0755); | |
786 | ||
787 | lower = joined_and_escaped_lower_dirs(m->lower); | |
788 | if (!lower) | |
789 | return log_oom(); | |
790 | ||
86c0dd4a LP |
791 | escaped_source = shell_escape(m->source, ",:"); |
792 | if (!escaped_source) | |
793 | return log_oom(); | |
e83bebef | 794 | |
86c0dd4a | 795 | if (m->read_only) |
e83bebef | 796 | options = strjoina("lowerdir=", escaped_source, ":", lower); |
86c0dd4a LP |
797 | else { |
798 | _cleanup_free_ char *escaped_work_dir = NULL; | |
e83bebef | 799 | |
e83bebef LP |
800 | escaped_work_dir = shell_escape(m->work_dir, ",:"); |
801 | if (!escaped_work_dir) | |
802 | return log_oom(); | |
803 | ||
804 | options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir); | |
805 | } | |
806 | ||
60e76d48 | 807 | return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options); |
e83bebef LP |
808 | } |
809 | ||
810 | int mount_custom( | |
811 | const char *dest, | |
88614c8a | 812 | CustomMount *mounts, size_t n, |
e83bebef LP |
813 | bool userns, uid_t uid_shift, uid_t uid_range, |
814 | const char *selinux_apifs_context) { | |
815 | ||
88614c8a | 816 | size_t i; |
e83bebef LP |
817 | int r; |
818 | ||
819 | assert(dest); | |
820 | ||
821 | for (i = 0; i < n; i++) { | |
822 | CustomMount *m = mounts + i; | |
823 | ||
824 | switch (m->type) { | |
825 | ||
826 | case CUSTOM_MOUNT_BIND: | |
827 | r = mount_bind(dest, m); | |
828 | break; | |
829 | ||
830 | case CUSTOM_MOUNT_TMPFS: | |
831 | r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context); | |
832 | break; | |
833 | ||
834 | case CUSTOM_MOUNT_OVERLAY: | |
835 | r = mount_overlay(dest, m); | |
836 | break; | |
837 | ||
838 | default: | |
839 | assert_not_reached("Unknown custom mount type"); | |
840 | } | |
841 | ||
842 | if (r < 0) | |
843 | return r; | |
844 | } | |
845 | ||
846 | return 0; | |
847 | } | |
848 | ||
e83bebef LP |
849 | int setup_volatile_state( |
850 | const char *directory, | |
851 | VolatileMode mode, | |
852 | bool userns, uid_t uid_shift, uid_t uid_range, | |
853 | const char *selinux_apifs_context) { | |
854 | ||
855 | _cleanup_free_ char *buf = NULL; | |
856 | const char *p, *options; | |
857 | int r; | |
858 | ||
859 | assert(directory); | |
860 | ||
861 | if (mode != VOLATILE_STATE) | |
862 | return 0; | |
863 | ||
864 | /* --volatile=state means we simply overmount /var | |
865 | with a tmpfs, and the rest read-only. */ | |
866 | ||
6b7c9f8b | 867 | r = bind_remount_recursive(directory, true, NULL); |
e83bebef LP |
868 | if (r < 0) |
869 | return log_error_errno(r, "Failed to remount %s read-only: %m", directory); | |
870 | ||
871 | p = prefix_roota(directory, "/var"); | |
872 | r = mkdir(p, 0755); | |
873 | if (r < 0 && errno != EEXIST) | |
874 | return log_error_errno(errno, "Failed to create %s: %m", directory); | |
875 | ||
876 | options = "mode=755"; | |
2fa017f1 | 877 | r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); |
e83bebef LP |
878 | if (r < 0) |
879 | return log_oom(); | |
880 | if (r > 0) | |
881 | options = buf; | |
882 | ||
60e76d48 | 883 | return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options); |
e83bebef LP |
884 | } |
885 | ||
886 | int setup_volatile( | |
887 | const char *directory, | |
888 | VolatileMode mode, | |
889 | bool userns, uid_t uid_shift, uid_t uid_range, | |
890 | const char *selinux_apifs_context) { | |
891 | ||
892 | bool tmpfs_mounted = false, bind_mounted = false; | |
893 | char template[] = "/tmp/nspawn-volatile-XXXXXX"; | |
894 | _cleanup_free_ char *buf = NULL; | |
895 | const char *f, *t, *options; | |
896 | int r; | |
897 | ||
898 | assert(directory); | |
899 | ||
900 | if (mode != VOLATILE_YES) | |
901 | return 0; | |
902 | ||
903 | /* --volatile=yes means we mount a tmpfs to the root dir, and | |
904 | the original /usr to use inside it, and that read-only. */ | |
905 | ||
906 | if (!mkdtemp(template)) | |
907 | return log_error_errno(errno, "Failed to create temporary directory: %m"); | |
908 | ||
909 | options = "mode=755"; | |
2fa017f1 | 910 | r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); |
e83bebef LP |
911 | if (r < 0) |
912 | return log_oom(); | |
913 | if (r > 0) | |
914 | options = buf; | |
915 | ||
60e76d48 ZJS |
916 | r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options); |
917 | if (r < 0) | |
e83bebef | 918 | goto fail; |
e83bebef LP |
919 | |
920 | tmpfs_mounted = true; | |
921 | ||
922 | f = prefix_roota(directory, "/usr"); | |
923 | t = prefix_roota(template, "/usr"); | |
924 | ||
925 | r = mkdir(t, 0755); | |
926 | if (r < 0 && errno != EEXIST) { | |
927 | r = log_error_errno(errno, "Failed to create %s: %m", t); | |
928 | goto fail; | |
929 | } | |
930 | ||
60e76d48 ZJS |
931 | r = mount_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL); |
932 | if (r < 0) | |
e83bebef | 933 | goto fail; |
e83bebef LP |
934 | |
935 | bind_mounted = true; | |
936 | ||
6b7c9f8b | 937 | r = bind_remount_recursive(t, true, NULL); |
e83bebef LP |
938 | if (r < 0) { |
939 | log_error_errno(r, "Failed to remount %s read-only: %m", t); | |
940 | goto fail; | |
941 | } | |
942 | ||
60e76d48 ZJS |
943 | r = mount_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL); |
944 | if (r < 0) | |
e83bebef | 945 | goto fail; |
e83bebef LP |
946 | |
947 | (void) rmdir(template); | |
948 | ||
949 | return 0; | |
950 | ||
951 | fail: | |
952 | if (bind_mounted) | |
60e76d48 | 953 | (void) umount_verbose(t); |
e83bebef LP |
954 | |
955 | if (tmpfs_mounted) | |
60e76d48 | 956 | (void) umount_verbose(template); |
e83bebef LP |
957 | (void) rmdir(template); |
958 | return r; | |
959 | } | |
b53ede69 PW |
960 | |
961 | /* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */ | |
962 | int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) { | |
963 | _cleanup_free_ char *root_new = NULL, *root_old = NULL; | |
964 | const char *p = s; | |
965 | int r; | |
966 | ||
967 | assert(pivot_root_new); | |
968 | assert(pivot_root_old); | |
969 | ||
970 | r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS); | |
971 | if (r < 0) | |
972 | return r; | |
973 | if (r == 0) | |
974 | return -EINVAL; | |
975 | ||
976 | if (isempty(p)) | |
977 | root_old = NULL; | |
978 | else { | |
979 | root_old = strdup(p); | |
980 | if (!root_old) | |
981 | return -ENOMEM; | |
982 | } | |
983 | ||
984 | if (!path_is_absolute(root_new)) | |
985 | return -EINVAL; | |
986 | if (root_old && !path_is_absolute(root_old)) | |
987 | return -EINVAL; | |
988 | ||
989 | free_and_replace(*pivot_root_new, root_new); | |
990 | free_and_replace(*pivot_root_old, root_old); | |
991 | ||
992 | return 0; | |
993 | } | |
994 | ||
995 | int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) { | |
996 | _cleanup_free_ char *directory_pivot_root_new = NULL; | |
997 | _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL; | |
998 | char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX"; | |
999 | bool remove_pivot_tmp = false; | |
1000 | int r; | |
1001 | ||
1002 | assert(directory); | |
1003 | ||
1004 | if (!pivot_root_new) | |
1005 | return 0; | |
1006 | ||
1007 | /* Pivot pivot_root_new to / and the existing / to pivot_root_old. | |
1008 | * If pivot_root_old is NULL, the existing / disappears. | |
1009 | * This requires a temporary directory, pivot_tmp, which is | |
1010 | * not a child of either. | |
1011 | * | |
1012 | * This is typically used for OSTree-style containers, where | |
1013 | * the root partition contains several sysroots which could be | |
1014 | * run. Normally, one would be chosen by the bootloader and | |
1015 | * pivoted to / by initramfs. | |
1016 | * | |
1017 | * For example, for an OSTree deployment, pivot_root_new | |
1018 | * would be: /ostree/deploy/$os/deploy/$checksum. Note that this | |
1019 | * code doesn’t do the /var mount which OSTree expects: use | |
1020 | * --bind +/sysroot/ostree/deploy/$os/var:/var for that. | |
1021 | * | |
1022 | * So in the OSTree case, we’ll end up with something like: | |
1023 | * - directory = /tmp/nspawn-root-123456 | |
1024 | * - pivot_root_new = /ostree/deploy/os/deploy/123abc | |
1025 | * - pivot_root_old = /sysroot | |
1026 | * - directory_pivot_root_new = | |
1027 | * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc | |
1028 | * - pivot_tmp = /tmp/nspawn-pivot-123456 | |
1029 | * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot | |
1030 | * | |
1031 | * Requires all file systems at directory and below to be mounted | |
1032 | * MS_PRIVATE or MS_SLAVE so they can be moved. | |
1033 | */ | |
1034 | directory_pivot_root_new = prefix_root(directory, pivot_root_new); | |
1035 | ||
1036 | /* Remount directory_pivot_root_new to make it movable. */ | |
1037 | r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL); | |
1038 | if (r < 0) | |
1039 | goto done; | |
1040 | ||
1041 | if (pivot_root_old) { | |
1042 | if (!mkdtemp(pivot_tmp)) { | |
1043 | r = log_error_errno(errno, "Failed to create temporary directory: %m"); | |
1044 | goto done; | |
1045 | } | |
1046 | ||
1047 | remove_pivot_tmp = true; | |
1048 | pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old); | |
1049 | ||
1050 | r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL); | |
1051 | if (r < 0) | |
1052 | goto done; | |
1053 | ||
1054 | r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL); | |
1055 | if (r < 0) | |
1056 | goto done; | |
1057 | ||
1058 | r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL); | |
1059 | if (r < 0) | |
1060 | goto done; | |
1061 | } else { | |
1062 | r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL); | |
1063 | if (r < 0) | |
1064 | goto done; | |
1065 | } | |
1066 | ||
1067 | done: | |
1068 | if (remove_pivot_tmp) | |
1069 | (void) rmdir(pivot_tmp); | |
1070 | ||
1071 | return r; | |
1072 | } |