]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
34829a32 LP |
2 | |
3 | #include <sys/mount.h> | |
4 | ||
b5efdb8a | 5 | #include "alloc-util.h" |
3ffd4af2 | 6 | #include "fd-util.h" |
07630cea | 7 | #include "fileio.h" |
04029482 | 8 | #include "fs-util.h" |
07630cea | 9 | #include "mkdir.h" |
60e76d48 | 10 | #include "mount-util.h" |
049af8ad | 11 | #include "mountpoint-util.h" |
3ffd4af2 | 12 | #include "nspawn-cgroup.h" |
04029482 LS |
13 | #include "nspawn-mount.h" |
14 | #include "path-util.h" | |
f0bef277 | 15 | #include "rm-rf.h" |
07630cea LP |
16 | #include "string-util.h" |
17 | #include "strv.h" | |
04029482 | 18 | #include "user-util.h" |
07630cea | 19 | #include "util.h" |
34829a32 | 20 | |
f0bef277 | 21 | static int chown_cgroup_path(const char *path, uid_t uid_shift) { |
34829a32 LP |
22 | _cleanup_close_ int fd = -1; |
23 | const char *fn; | |
34829a32 | 24 | |
f0bef277 | 25 | fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); |
34829a32 | 26 | if (fd < 0) |
f0bef277 | 27 | return -errno; |
34829a32 LP |
28 | |
29 | FOREACH_STRING(fn, | |
30 | ".", | |
34829a32 LP |
31 | "cgroup.clone_children", |
32 | "cgroup.controllers", | |
1cfdbe29 LP |
33 | "cgroup.events", |
34 | "cgroup.procs", | |
35 | "cgroup.stat", | |
36 | "cgroup.subtree_control", | |
37 | "cgroup.threads", | |
38 | "notify_on_release", | |
39 | "tasks") | |
34829a32 LP |
40 | if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0) |
41 | log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, | |
bd68e99b | 42 | "Failed to chown \"%s/%s\", ignoring: %m", path, fn); |
34829a32 LP |
43 | |
44 | return 0; | |
45 | } | |
46 | ||
de54e02d | 47 | int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { |
f0bef277 | 48 | _cleanup_free_ char *path = NULL, *fs = NULL; |
f0bef277 EV |
49 | int r; |
50 | ||
51 | r = cg_pid_get_path(NULL, pid, &path); | |
52 | if (r < 0) | |
53 | return log_error_errno(r, "Failed to get container cgroup path: %m"); | |
54 | ||
55 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs); | |
56 | if (r < 0) | |
57 | return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); | |
58 | ||
59 | r = chown_cgroup_path(fs, uid_shift); | |
60 | if (r < 0) | |
61 | return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs); | |
62 | ||
89f18020 | 63 | if (unified_requested == CGROUP_UNIFIED_SYSTEMD || (unified_requested == CGROUP_UNIFIED_NONE && cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0)) { |
de54e02d LP |
64 | _cleanup_free_ char *lfs = NULL; |
65 | /* Always propagate access rights from unified to legacy controller */ | |
66 | ||
67 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, NULL, &lfs); | |
68 | if (r < 0) | |
69 | return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); | |
70 | ||
71 | r = chown_cgroup_path(lfs, uid_shift); | |
72 | if (r < 0) | |
73 | return log_error_errno(r, "Failed to chown() cgroup %s: %m", lfs); | |
74 | } | |
75 | ||
f0bef277 EV |
76 | return 0; |
77 | } | |
78 | ||
93dbdf6c | 79 | int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { |
34829a32 LP |
80 | _cleanup_free_ char *cgroup = NULL; |
81 | char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; | |
82 | bool undo_mount = false; | |
83 | const char *fn; | |
b4cccbc1 | 84 | int r, unified_controller; |
34829a32 | 85 | |
c22800e4 | 86 | unified_controller = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); |
b4cccbc1 LP |
87 | if (unified_controller < 0) |
88 | return log_error_errno(unified_controller, "Failed to determine whether the systemd hierarchy is unified: %m"); | |
89 | if ((unified_controller > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) | |
34829a32 LP |
90 | return 0; |
91 | ||
92 | /* When the host uses the legacy cgroup setup, but the | |
93 | * container shall use the unified hierarchy, let's make sure | |
94 | * we copy the path from the name=systemd hierarchy into the | |
95 | * unified hierarchy. Similar for the reverse situation. */ | |
96 | ||
97 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
98 | if (r < 0) | |
99 | return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid); | |
100 | ||
101 | /* In order to access the unified hierarchy we need to mount it */ | |
102 | if (!mkdtemp(tree)) | |
103 | return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m"); | |
104 | ||
b4cccbc1 | 105 | if (unified_controller > 0) |
60e76d48 ZJS |
106 | r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup", |
107 | MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr"); | |
34829a32 | 108 | else |
60e76d48 ZJS |
109 | r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup2", |
110 | MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); | |
111 | if (r < 0) | |
34829a32 | 112 | goto finish; |
34829a32 LP |
113 | |
114 | undo_mount = true; | |
115 | ||
f0bef277 EV |
116 | /* If nspawn dies abruptly the cgroup hierarchy created below |
117 | * its unit isn't cleaned up. So, let's remove it | |
118 | * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */ | |
119 | fn = strjoina(tree, cgroup); | |
120 | (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES); | |
121 | ||
34829a32 LP |
122 | fn = strjoina(tree, cgroup, "/cgroup.procs"); |
123 | (void) mkdir_parents(fn, 0755); | |
124 | ||
125 | sprintf(pid_string, PID_FMT, pid); | |
57512c89 | 126 | r = write_string_file(fn, pid_string, WRITE_STRING_FILE_DISABLE_BUFFER); |
f0bef277 | 127 | if (r < 0) { |
34829a32 | 128 | log_error_errno(r, "Failed to move process: %m"); |
f0bef277 EV |
129 | goto finish; |
130 | } | |
34829a32 | 131 | |
f0bef277 | 132 | fn = strjoina(tree, cgroup); |
93dbdf6c | 133 | r = chown_cgroup_path(fn, uid_shift); |
f0bef277 EV |
134 | if (r < 0) |
135 | log_error_errno(r, "Failed to chown() cgroup %s: %m", fn); | |
34829a32 LP |
136 | finish: |
137 | if (undo_mount) | |
60e76d48 | 138 | (void) umount_verbose(tree); |
34829a32 LP |
139 | |
140 | (void) rmdir(tree); | |
141 | return r; | |
142 | } | |
143 | ||
720f0a2f | 144 | int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) { |
34829a32 | 145 | _cleanup_free_ char *cgroup = NULL; |
34829a32 | 146 | CGroupMask supported; |
720f0a2f LP |
147 | const char *payload; |
148 | int r; | |
34829a32 | 149 | |
720f0a2f LP |
150 | assert(pid > 1); |
151 | ||
152 | /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in | |
153 | * the unified hierarchy and the container does the same, and we did not create a scope unit for the container | |
154 | * move us and the container into two separate subcgroups. | |
155 | * | |
156 | * Moreover, container payloads such as systemd try to manage the cgroup they run in in full (i.e. including | |
157 | * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a | |
158 | * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the | |
159 | * host systemd directly to the payload, the host and payload systemd might fight for the cgroup | |
160 | * attributes. Hence, let's insert an intermediary cgroup to cover that case too. | |
161 | * | |
162 | * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup | |
163 | * that's fine because there's only one hiearchy anyway and controllers are enabled directly on it. On the | |
164 | * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't | |
165 | * do it. */ | |
34829a32 LP |
166 | |
167 | r = cg_mask_supported(&supported); | |
168 | if (r < 0) | |
169 | return log_error_errno(r, "Failed to determine supported controllers: %m"); | |
170 | ||
720f0a2f LP |
171 | if (keep_unit) |
172 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); | |
173 | else | |
174 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
34829a32 LP |
175 | if (r < 0) |
176 | return log_error_errno(r, "Failed to get our control group: %m"); | |
177 | ||
720f0a2f LP |
178 | payload = strjoina(cgroup, "/payload"); |
179 | r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid); | |
34829a32 | 180 | if (r < 0) |
720f0a2f | 181 | return log_error_errno(r, "Failed to create %s subcgroup: %m", payload); |
34829a32 | 182 | |
720f0a2f LP |
183 | if (keep_unit) { |
184 | const char *supervisor; | |
185 | ||
186 | supervisor = strjoina(cgroup, "/supervisor"); | |
187 | r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0); | |
188 | if (r < 0) | |
189 | return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor); | |
190 | } | |
34829a32 LP |
191 | |
192 | /* Try to enable as many controllers as possible for the new payload. */ | |
27adcc97 | 193 | (void) cg_enable_everywhere(supported, supported, cgroup, NULL); |
34829a32 LP |
194 | return 0; |
195 | } | |
04029482 LS |
196 | |
197 | /* Retrieve existing subsystems. This function is called in a new cgroup | |
198 | * namespace. | |
199 | */ | |
200 | static int get_process_controllers(Set **ret) { | |
201 | _cleanup_set_free_free_ Set *controllers = NULL; | |
202 | _cleanup_fclose_ FILE *f = NULL; | |
203 | int r; | |
204 | ||
205 | assert(ret); | |
206 | ||
207 | controllers = set_new(&string_hash_ops); | |
208 | if (!controllers) | |
209 | return -ENOMEM; | |
210 | ||
211 | f = fopen("/proc/self/cgroup", "re"); | |
212 | if (!f) | |
213 | return errno == ENOENT ? -ESRCH : -errno; | |
214 | ||
215 | for (;;) { | |
216 | _cleanup_free_ char *line = NULL; | |
217 | char *e, *l; | |
218 | ||
219 | r = read_line(f, LONG_LINE_MAX, &line); | |
220 | if (r < 0) | |
221 | return r; | |
222 | if (r == 0) | |
223 | break; | |
224 | ||
225 | l = strchr(line, ':'); | |
226 | if (!l) | |
227 | continue; | |
228 | ||
229 | l++; | |
230 | e = strchr(l, ':'); | |
231 | if (!e) | |
232 | continue; | |
233 | ||
234 | *e = 0; | |
235 | ||
236 | if (STR_IN_SET(l, "", "name=systemd", "name=unified")) | |
237 | continue; | |
238 | ||
239 | r = set_put_strdup(controllers, l); | |
240 | if (r < 0) | |
241 | return r; | |
242 | } | |
243 | ||
244 | *ret = TAKE_PTR(controllers); | |
245 | ||
246 | return 0; | |
247 | } | |
248 | ||
249 | static int mount_legacy_cgroup_hierarchy( | |
250 | const char *dest, | |
251 | const char *controller, | |
252 | const char *hierarchy, | |
253 | bool read_only) { | |
254 | ||
255 | const char *to, *fstype, *opts; | |
256 | int r; | |
257 | ||
258 | to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy); | |
259 | ||
260 | r = path_is_mount_point(to, dest, 0); | |
261 | if (r < 0 && r != -ENOENT) | |
262 | return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to); | |
263 | if (r > 0) | |
264 | return 0; | |
265 | ||
266 | mkdir_p(to, 0755); | |
267 | ||
268 | /* The superblock mount options of the mount point need to be | |
269 | * identical to the hosts', and hence writable... */ | |
270 | if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { | |
271 | fstype = "cgroup2"; | |
272 | opts = NULL; | |
273 | } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { | |
274 | fstype = "cgroup"; | |
275 | opts = "none,name=systemd,xattr"; | |
276 | } else { | |
277 | fstype = "cgroup"; | |
278 | opts = controller; | |
279 | } | |
280 | ||
281 | r = mount_verbose(LOG_ERR, "cgroup", to, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts); | |
282 | if (r < 0) | |
283 | return r; | |
284 | ||
285 | /* ... hence let's only make the bind mount read-only, not the superblock. */ | |
286 | if (read_only) { | |
287 | r = mount_verbose(LOG_ERR, NULL, to, NULL, | |
288 | MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); | |
289 | if (r < 0) | |
290 | return r; | |
291 | } | |
292 | ||
293 | return 1; | |
294 | } | |
295 | ||
296 | /* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */ | |
297 | static int mount_legacy_cgns_supported( | |
298 | const char *dest, | |
299 | CGroupUnified unified_requested, | |
300 | bool userns, | |
301 | uid_t uid_shift, | |
302 | uid_t uid_range, | |
303 | const char *selinux_apifs_context) { | |
304 | ||
305 | _cleanup_set_free_free_ Set *controllers = NULL; | |
306 | const char *cgroup_root = "/sys/fs/cgroup", *c; | |
307 | int r; | |
308 | ||
309 | (void) mkdir_p(cgroup_root, 0755); | |
310 | ||
311 | /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ | |
312 | r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW); | |
313 | if (r < 0) | |
314 | return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); | |
315 | if (r == 0) { | |
316 | _cleanup_free_ char *options = NULL; | |
317 | ||
318 | /* When cgroup namespaces are enabled and user namespaces are | |
319 | * used then the mount of the cgroupfs is done *inside* the new | |
320 | * user namespace. We're root in the new user namespace and the | |
321 | * kernel will happily translate our uid/gid to the correct | |
322 | * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply | |
323 | * pass uid 0 and not uid_shift to tmpfs_patch_options(). | |
324 | */ | |
325 | r = tmpfs_patch_options("mode=755", 0, selinux_apifs_context, &options); | |
326 | if (r < 0) | |
327 | return log_oom(); | |
328 | ||
329 | r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs", | |
330 | MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options); | |
331 | if (r < 0) | |
332 | return r; | |
333 | } | |
334 | ||
335 | r = cg_all_unified(); | |
336 | if (r < 0) | |
337 | return r; | |
338 | if (r > 0) | |
339 | goto skip_controllers; | |
340 | ||
341 | r = get_process_controllers(&controllers); | |
342 | if (r < 0) | |
343 | return log_error_errno(r, "Failed to determine cgroup controllers: %m"); | |
344 | ||
345 | for (;;) { | |
346 | _cleanup_free_ const char *controller = NULL; | |
347 | ||
348 | controller = set_steal_first(controllers); | |
349 | if (!controller) | |
350 | break; | |
351 | ||
352 | r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); | |
353 | if (r < 0) | |
354 | return r; | |
355 | ||
356 | /* When multiple hierarchies are co-mounted, make their | |
357 | * constituting individual hierarchies a symlink to the | |
358 | * co-mount. | |
359 | */ | |
360 | c = controller; | |
361 | for (;;) { | |
362 | _cleanup_free_ char *target = NULL, *tok = NULL; | |
363 | ||
364 | r = extract_first_word(&c, &tok, ",", 0); | |
365 | if (r < 0) | |
366 | return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m"); | |
367 | if (r == 0) | |
368 | break; | |
369 | ||
370 | if (streq(controller, tok)) | |
371 | break; | |
372 | ||
373 | target = prefix_root("/sys/fs/cgroup/", tok); | |
374 | if (!target) | |
375 | return log_oom(); | |
376 | ||
6c9c51e5 | 377 | r = symlink_idempotent(controller, target, false); |
04029482 LS |
378 | if (r == -EINVAL) |
379 | return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); | |
380 | if (r < 0) | |
381 | return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); | |
382 | } | |
383 | } | |
384 | ||
385 | skip_controllers: | |
386 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
387 | r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); | |
388 | if (r < 0) | |
389 | return r; | |
390 | } | |
391 | ||
392 | r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); | |
393 | if (r < 0) | |
394 | return r; | |
395 | ||
396 | if (!userns) | |
397 | return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL, | |
398 | MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); | |
399 | ||
400 | return 0; | |
401 | } | |
402 | ||
403 | /* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */ | |
404 | static int mount_legacy_cgns_unsupported( | |
405 | const char *dest, | |
406 | CGroupUnified unified_requested, | |
407 | bool userns, | |
408 | uid_t uid_shift, | |
409 | uid_t uid_range, | |
410 | const char *selinux_apifs_context) { | |
411 | ||
412 | _cleanup_set_free_free_ Set *controllers = NULL; | |
413 | const char *cgroup_root; | |
414 | int r; | |
415 | ||
416 | cgroup_root = prefix_roota(dest, "/sys/fs/cgroup"); | |
417 | ||
418 | (void) mkdir_p(cgroup_root, 0755); | |
419 | ||
420 | /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ | |
421 | r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW); | |
422 | if (r < 0) | |
423 | return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); | |
424 | if (r == 0) { | |
425 | _cleanup_free_ char *options = NULL; | |
426 | ||
427 | r = tmpfs_patch_options("mode=755", uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options); | |
428 | if (r < 0) | |
429 | return log_oom(); | |
430 | ||
431 | r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs", | |
432 | MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options); | |
433 | if (r < 0) | |
434 | return r; | |
435 | } | |
436 | ||
437 | r = cg_all_unified(); | |
438 | if (r < 0) | |
439 | return r; | |
440 | if (r > 0) | |
441 | goto skip_controllers; | |
442 | ||
443 | r = cg_kernel_controllers(&controllers); | |
444 | if (r < 0) | |
445 | return log_error_errno(r, "Failed to determine cgroup controllers: %m"); | |
446 | ||
447 | for (;;) { | |
448 | _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL; | |
449 | ||
450 | controller = set_steal_first(controllers); | |
451 | if (!controller) | |
452 | break; | |
453 | ||
454 | origin = prefix_root("/sys/fs/cgroup/", controller); | |
455 | if (!origin) | |
456 | return log_oom(); | |
457 | ||
458 | r = readlink_malloc(origin, &combined); | |
459 | if (r == -EINVAL) { | |
460 | /* Not a symbolic link, but directly a single cgroup hierarchy */ | |
461 | ||
462 | r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); | |
463 | if (r < 0) | |
464 | return r; | |
465 | ||
466 | } else if (r < 0) | |
467 | return log_error_errno(r, "Failed to read link %s: %m", origin); | |
468 | else { | |
469 | _cleanup_free_ char *target = NULL; | |
470 | ||
471 | target = prefix_root(dest, origin); | |
472 | if (!target) | |
473 | return log_oom(); | |
474 | ||
475 | /* A symbolic link, a combination of controllers in one hierarchy */ | |
476 | ||
477 | if (!filename_is_valid(combined)) { | |
478 | log_warning("Ignoring invalid combined hierarchy %s.", combined); | |
479 | continue; | |
480 | } | |
481 | ||
482 | r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); | |
483 | if (r < 0) | |
484 | return r; | |
485 | ||
6c9c51e5 | 486 | r = symlink_idempotent(combined, target, false); |
04029482 LS |
487 | if (r == -EINVAL) |
488 | return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); | |
489 | if (r < 0) | |
490 | return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); | |
491 | } | |
492 | } | |
493 | ||
494 | skip_controllers: | |
495 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
496 | r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); | |
497 | if (r < 0) | |
498 | return r; | |
499 | } | |
500 | ||
501 | r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); | |
502 | if (r < 0) | |
503 | return r; | |
504 | ||
505 | return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL, | |
506 | MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); | |
507 | } | |
508 | ||
509 | static int mount_unified_cgroups(const char *dest) { | |
510 | const char *p; | |
511 | int r; | |
512 | ||
513 | assert(dest); | |
514 | ||
515 | p = prefix_roota(dest, "/sys/fs/cgroup"); | |
516 | ||
517 | (void) mkdir_p(p, 0755); | |
518 | ||
519 | r = path_is_mount_point(p, dest, AT_SYMLINK_FOLLOW); | |
520 | if (r < 0) | |
521 | return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p); | |
522 | if (r > 0) { | |
523 | p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs"); | |
524 | if (access(p, F_OK) >= 0) | |
525 | return 0; | |
526 | if (errno != ENOENT) | |
527 | return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p); | |
528 | ||
baaa35ad ZJS |
529 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
530 | "%s is already mounted but not a unified cgroup hierarchy. Refusing.", p); | |
04029482 LS |
531 | } |
532 | ||
533 | return mount_verbose(LOG_ERR, "cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); | |
534 | } | |
535 | ||
536 | int mount_cgroups( | |
537 | const char *dest, | |
538 | CGroupUnified unified_requested, | |
539 | bool userns, | |
540 | uid_t uid_shift, | |
541 | uid_t uid_range, | |
542 | const char *selinux_apifs_context, | |
543 | bool use_cgns) { | |
544 | ||
545 | if (unified_requested >= CGROUP_UNIFIED_ALL) | |
546 | return mount_unified_cgroups(dest); | |
547 | if (use_cgns) | |
548 | return mount_legacy_cgns_supported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); | |
549 | ||
550 | return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); | |
551 | } | |
552 | ||
553 | static int mount_systemd_cgroup_writable_one(const char *root, const char *own) { | |
554 | int r; | |
555 | ||
556 | assert(root); | |
557 | assert(own); | |
558 | ||
559 | /* Make our own cgroup a (writable) bind mount */ | |
560 | r = mount_verbose(LOG_ERR, own, own, NULL, MS_BIND, NULL); | |
561 | if (r < 0) | |
562 | return r; | |
563 | ||
564 | /* And then remount the systemd cgroup root read-only */ | |
565 | return mount_verbose(LOG_ERR, NULL, root, NULL, | |
566 | MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); | |
567 | } | |
568 | ||
569 | int mount_systemd_cgroup_writable( | |
570 | const char *dest, | |
571 | CGroupUnified unified_requested) { | |
572 | ||
573 | _cleanup_free_ char *own_cgroup_path = NULL; | |
574 | const char *root, *own; | |
575 | int r; | |
576 | ||
577 | assert(dest); | |
578 | ||
579 | r = cg_pid_get_path(NULL, 0, &own_cgroup_path); | |
580 | if (r < 0) | |
581 | return log_error_errno(r, "Failed to determine our own cgroup path: %m"); | |
582 | ||
583 | /* If we are living in the top-level, then there's nothing to do... */ | |
584 | if (path_equal(own_cgroup_path, "/")) | |
585 | return 0; | |
586 | ||
587 | if (unified_requested >= CGROUP_UNIFIED_ALL) { | |
588 | ||
589 | root = prefix_roota(dest, "/sys/fs/cgroup"); | |
590 | own = strjoina(root, own_cgroup_path); | |
591 | ||
592 | } else { | |
593 | ||
594 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
595 | root = prefix_roota(dest, "/sys/fs/cgroup/unified"); | |
596 | own = strjoina(root, own_cgroup_path); | |
597 | ||
598 | r = mount_systemd_cgroup_writable_one(root, own); | |
599 | if (r < 0) | |
600 | return r; | |
601 | } | |
602 | ||
603 | root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); | |
604 | own = strjoina(root, own_cgroup_path); | |
605 | } | |
606 | ||
607 | return mount_systemd_cgroup_writable_one(root, own); | |
608 | } |