]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
34829a32 LP |
2 | |
3 | #include <sys/mount.h> | |
4 | ||
b5efdb8a | 5 | #include "alloc-util.h" |
fdb3deca | 6 | #include "cgroup-setup.h" |
3ffd4af2 | 7 | #include "fd-util.h" |
07630cea | 8 | #include "fileio.h" |
ca78ad1d | 9 | #include "format-util.h" |
04029482 | 10 | #include "fs-util.h" |
07630cea | 11 | #include "mkdir.h" |
60e76d48 | 12 | #include "mount-util.h" |
049af8ad | 13 | #include "mountpoint-util.h" |
3ffd4af2 | 14 | #include "nspawn-cgroup.h" |
04029482 LS |
15 | #include "nspawn-mount.h" |
16 | #include "path-util.h" | |
f0bef277 | 17 | #include "rm-rf.h" |
07630cea LP |
18 | #include "string-util.h" |
19 | #include "strv.h" | |
04029482 | 20 | #include "user-util.h" |
34829a32 | 21 | |
f0bef277 | 22 | static int chown_cgroup_path(const char *path, uid_t uid_shift) { |
34829a32 | 23 | _cleanup_close_ int fd = -1; |
34829a32 | 24 | |
f0bef277 | 25 | fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); |
34829a32 | 26 | if (fd < 0) |
f0bef277 | 27 | return -errno; |
34829a32 LP |
28 | |
29 | FOREACH_STRING(fn, | |
30 | ".", | |
34829a32 LP |
31 | "cgroup.clone_children", |
32 | "cgroup.controllers", | |
1cfdbe29 LP |
33 | "cgroup.events", |
34 | "cgroup.procs", | |
35 | "cgroup.stat", | |
36 | "cgroup.subtree_control", | |
37 | "cgroup.threads", | |
38 | "notify_on_release", | |
39 | "tasks") | |
34829a32 LP |
40 | if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0) |
41 | log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, | |
bd68e99b | 42 | "Failed to chown \"%s/%s\", ignoring: %m", path, fn); |
34829a32 LP |
43 | |
44 | return 0; | |
45 | } | |
46 | ||
de54e02d | 47 | int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { |
f0bef277 | 48 | _cleanup_free_ char *path = NULL, *fs = NULL; |
f0bef277 EV |
49 | int r; |
50 | ||
51 | r = cg_pid_get_path(NULL, pid, &path); | |
52 | if (r < 0) | |
53 | return log_error_errno(r, "Failed to get container cgroup path: %m"); | |
54 | ||
55 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs); | |
56 | if (r < 0) | |
57 | return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); | |
58 | ||
59 | r = chown_cgroup_path(fs, uid_shift); | |
60 | if (r < 0) | |
61 | return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs); | |
62 | ||
89f18020 | 63 | if (unified_requested == CGROUP_UNIFIED_SYSTEMD || (unified_requested == CGROUP_UNIFIED_NONE && cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0)) { |
de54e02d LP |
64 | _cleanup_free_ char *lfs = NULL; |
65 | /* Always propagate access rights from unified to legacy controller */ | |
66 | ||
67 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, NULL, &lfs); | |
68 | if (r < 0) | |
69 | return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); | |
70 | ||
71 | r = chown_cgroup_path(lfs, uid_shift); | |
72 | if (r < 0) | |
73 | return log_error_errno(r, "Failed to chown() cgroup %s: %m", lfs); | |
74 | } | |
75 | ||
f0bef277 EV |
76 | return 0; |
77 | } | |
78 | ||
93dbdf6c | 79 | int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { |
34829a32 LP |
80 | _cleanup_free_ char *cgroup = NULL; |
81 | char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; | |
82 | bool undo_mount = false; | |
83 | const char *fn; | |
b4cccbc1 | 84 | int r, unified_controller; |
34829a32 | 85 | |
c22800e4 | 86 | unified_controller = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); |
b4cccbc1 LP |
87 | if (unified_controller < 0) |
88 | return log_error_errno(unified_controller, "Failed to determine whether the systemd hierarchy is unified: %m"); | |
89 | if ((unified_controller > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) | |
34829a32 LP |
90 | return 0; |
91 | ||
92 | /* When the host uses the legacy cgroup setup, but the | |
93 | * container shall use the unified hierarchy, let's make sure | |
94 | * we copy the path from the name=systemd hierarchy into the | |
95 | * unified hierarchy. Similar for the reverse situation. */ | |
96 | ||
97 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
98 | if (r < 0) | |
99 | return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid); | |
100 | ||
101 | /* In order to access the unified hierarchy we need to mount it */ | |
102 | if (!mkdtemp(tree)) | |
103 | return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m"); | |
104 | ||
b4cccbc1 | 105 | if (unified_controller > 0) |
511a8cfe LP |
106 | r = mount_nofollow_verbose(LOG_ERR, "cgroup", tree, "cgroup", |
107 | MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr"); | |
34829a32 | 108 | else |
511a8cfe LP |
109 | r = mount_nofollow_verbose(LOG_ERR, "cgroup", tree, "cgroup2", |
110 | MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); | |
60e76d48 | 111 | if (r < 0) |
34829a32 | 112 | goto finish; |
34829a32 LP |
113 | |
114 | undo_mount = true; | |
115 | ||
f0bef277 EV |
116 | /* If nspawn dies abruptly the cgroup hierarchy created below |
117 | * its unit isn't cleaned up. So, let's remove it | |
118 | * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */ | |
119 | fn = strjoina(tree, cgroup); | |
120 | (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES); | |
121 | ||
34829a32 | 122 | fn = strjoina(tree, cgroup, "/cgroup.procs"); |
34829a32 LP |
123 | |
124 | sprintf(pid_string, PID_FMT, pid); | |
e82e549f | 125 | r = write_string_file(fn, pid_string, WRITE_STRING_FILE_DISABLE_BUFFER|WRITE_STRING_FILE_MKDIR_0755); |
f0bef277 | 126 | if (r < 0) { |
34829a32 | 127 | log_error_errno(r, "Failed to move process: %m"); |
f0bef277 EV |
128 | goto finish; |
129 | } | |
34829a32 | 130 | |
f0bef277 | 131 | fn = strjoina(tree, cgroup); |
93dbdf6c | 132 | r = chown_cgroup_path(fn, uid_shift); |
f0bef277 EV |
133 | if (r < 0) |
134 | log_error_errno(r, "Failed to chown() cgroup %s: %m", fn); | |
34829a32 LP |
135 | finish: |
136 | if (undo_mount) | |
30f5d104 | 137 | (void) umount_verbose(LOG_ERR, tree, UMOUNT_NOFOLLOW); |
34829a32 LP |
138 | |
139 | (void) rmdir(tree); | |
140 | return r; | |
141 | } | |
142 | ||
720f0a2f | 143 | int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) { |
34829a32 | 144 | _cleanup_free_ char *cgroup = NULL; |
34829a32 | 145 | CGroupMask supported; |
720f0a2f LP |
146 | const char *payload; |
147 | int r; | |
34829a32 | 148 | |
720f0a2f LP |
149 | assert(pid > 1); |
150 | ||
151 | /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in | |
152 | * the unified hierarchy and the container does the same, and we did not create a scope unit for the container | |
153 | * move us and the container into two separate subcgroups. | |
154 | * | |
ad337e55 | 155 | * Moreover, container payloads such as systemd try to manage the cgroup they run in full (i.e. including |
720f0a2f LP |
156 | * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a |
157 | * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the | |
158 | * host systemd directly to the payload, the host and payload systemd might fight for the cgroup | |
159 | * attributes. Hence, let's insert an intermediary cgroup to cover that case too. | |
160 | * | |
161 | * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup | |
5238e957 | 162 | * that's fine because there's only one hierarchy anyway and controllers are enabled directly on it. On the |
720f0a2f LP |
163 | * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't |
164 | * do it. */ | |
34829a32 LP |
165 | |
166 | r = cg_mask_supported(&supported); | |
167 | if (r < 0) | |
168 | return log_error_errno(r, "Failed to determine supported controllers: %m"); | |
169 | ||
720f0a2f LP |
170 | if (keep_unit) |
171 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); | |
172 | else | |
173 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); | |
34829a32 LP |
174 | if (r < 0) |
175 | return log_error_errno(r, "Failed to get our control group: %m"); | |
176 | ||
720f0a2f LP |
177 | payload = strjoina(cgroup, "/payload"); |
178 | r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid); | |
34829a32 | 179 | if (r < 0) |
720f0a2f | 180 | return log_error_errno(r, "Failed to create %s subcgroup: %m", payload); |
34829a32 | 181 | |
720f0a2f LP |
182 | if (keep_unit) { |
183 | const char *supervisor; | |
184 | ||
185 | supervisor = strjoina(cgroup, "/supervisor"); | |
186 | r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0); | |
187 | if (r < 0) | |
188 | return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor); | |
189 | } | |
34829a32 LP |
190 | |
191 | /* Try to enable as many controllers as possible for the new payload. */ | |
27adcc97 | 192 | (void) cg_enable_everywhere(supported, supported, cgroup, NULL); |
34829a32 LP |
193 | return 0; |
194 | } | |
04029482 LS |
195 | |
196 | /* Retrieve existing subsystems. This function is called in a new cgroup | |
197 | * namespace. | |
198 | */ | |
199 | static int get_process_controllers(Set **ret) { | |
be327321 | 200 | _cleanup_set_free_ Set *controllers = NULL; |
04029482 LS |
201 | _cleanup_fclose_ FILE *f = NULL; |
202 | int r; | |
203 | ||
204 | assert(ret); | |
205 | ||
04029482 LS |
206 | f = fopen("/proc/self/cgroup", "re"); |
207 | if (!f) | |
208 | return errno == ENOENT ? -ESRCH : -errno; | |
209 | ||
210 | for (;;) { | |
211 | _cleanup_free_ char *line = NULL; | |
212 | char *e, *l; | |
213 | ||
214 | r = read_line(f, LONG_LINE_MAX, &line); | |
215 | if (r < 0) | |
216 | return r; | |
217 | if (r == 0) | |
218 | break; | |
219 | ||
220 | l = strchr(line, ':'); | |
221 | if (!l) | |
222 | continue; | |
223 | ||
224 | l++; | |
225 | e = strchr(l, ':'); | |
226 | if (!e) | |
227 | continue; | |
228 | ||
229 | *e = 0; | |
230 | ||
231 | if (STR_IN_SET(l, "", "name=systemd", "name=unified")) | |
232 | continue; | |
233 | ||
be327321 | 234 | r = set_put_strdup(&controllers, l); |
04029482 LS |
235 | if (r < 0) |
236 | return r; | |
237 | } | |
238 | ||
239 | *ret = TAKE_PTR(controllers); | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | static int mount_legacy_cgroup_hierarchy( | |
245 | const char *dest, | |
246 | const char *controller, | |
247 | const char *hierarchy, | |
248 | bool read_only) { | |
249 | ||
250 | const char *to, *fstype, *opts; | |
251 | int r; | |
252 | ||
253 | to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy); | |
254 | ||
255 | r = path_is_mount_point(to, dest, 0); | |
256 | if (r < 0 && r != -ENOENT) | |
257 | return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to); | |
258 | if (r > 0) | |
259 | return 0; | |
260 | ||
5ef4cb7a | 261 | (void) mkdir_p(to, 0755); |
04029482 LS |
262 | |
263 | /* The superblock mount options of the mount point need to be | |
264 | * identical to the hosts', and hence writable... */ | |
265 | if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { | |
266 | fstype = "cgroup2"; | |
267 | opts = NULL; | |
268 | } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { | |
269 | fstype = "cgroup"; | |
270 | opts = "none,name=systemd,xattr"; | |
271 | } else { | |
272 | fstype = "cgroup"; | |
273 | opts = controller; | |
274 | } | |
275 | ||
511a8cfe | 276 | r = mount_nofollow_verbose(LOG_ERR, "cgroup", to, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts); |
04029482 LS |
277 | if (r < 0) |
278 | return r; | |
279 | ||
280 | /* ... hence let's only make the bind mount read-only, not the superblock. */ | |
281 | if (read_only) { | |
511a8cfe LP |
282 | r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, |
283 | MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); | |
04029482 LS |
284 | if (r < 0) |
285 | return r; | |
286 | } | |
287 | ||
288 | return 1; | |
289 | } | |
290 | ||
291 | /* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */ | |
292 | static int mount_legacy_cgns_supported( | |
293 | const char *dest, | |
294 | CGroupUnified unified_requested, | |
295 | bool userns, | |
296 | uid_t uid_shift, | |
297 | uid_t uid_range, | |
298 | const char *selinux_apifs_context) { | |
299 | ||
be327321 | 300 | _cleanup_set_free_ Set *controllers = NULL; |
04029482 LS |
301 | const char *cgroup_root = "/sys/fs/cgroup", *c; |
302 | int r; | |
303 | ||
304 | (void) mkdir_p(cgroup_root, 0755); | |
305 | ||
306 | /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ | |
307 | r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW); | |
308 | if (r < 0) | |
309 | return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); | |
310 | if (r == 0) { | |
311 | _cleanup_free_ char *options = NULL; | |
312 | ||
313 | /* When cgroup namespaces are enabled and user namespaces are | |
314 | * used then the mount of the cgroupfs is done *inside* the new | |
315 | * user namespace. We're root in the new user namespace and the | |
316 | * kernel will happily translate our uid/gid to the correct | |
317 | * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply | |
318 | * pass uid 0 and not uid_shift to tmpfs_patch_options(). | |
319 | */ | |
7d85383e | 320 | r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, 0, selinux_apifs_context, &options); |
04029482 LS |
321 | if (r < 0) |
322 | return log_oom(); | |
323 | ||
511a8cfe LP |
324 | r = mount_nofollow_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs", |
325 | MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options); | |
04029482 LS |
326 | if (r < 0) |
327 | return r; | |
328 | } | |
329 | ||
330 | r = cg_all_unified(); | |
331 | if (r < 0) | |
332 | return r; | |
333 | if (r > 0) | |
334 | goto skip_controllers; | |
335 | ||
336 | r = get_process_controllers(&controllers); | |
337 | if (r < 0) | |
338 | return log_error_errno(r, "Failed to determine cgroup controllers: %m"); | |
339 | ||
340 | for (;;) { | |
341 | _cleanup_free_ const char *controller = NULL; | |
342 | ||
343 | controller = set_steal_first(controllers); | |
344 | if (!controller) | |
345 | break; | |
346 | ||
347 | r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); | |
348 | if (r < 0) | |
349 | return r; | |
350 | ||
351 | /* When multiple hierarchies are co-mounted, make their | |
352 | * constituting individual hierarchies a symlink to the | |
353 | * co-mount. | |
354 | */ | |
355 | c = controller; | |
356 | for (;;) { | |
357 | _cleanup_free_ char *target = NULL, *tok = NULL; | |
358 | ||
359 | r = extract_first_word(&c, &tok, ",", 0); | |
360 | if (r < 0) | |
361 | return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m"); | |
362 | if (r == 0) | |
363 | break; | |
364 | ||
365 | if (streq(controller, tok)) | |
366 | break; | |
367 | ||
c6134d3e | 368 | target = path_join("/sys/fs/cgroup/", tok); |
04029482 LS |
369 | if (!target) |
370 | return log_oom(); | |
371 | ||
6c9c51e5 | 372 | r = symlink_idempotent(controller, target, false); |
04029482 LS |
373 | if (r == -EINVAL) |
374 | return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); | |
375 | if (r < 0) | |
376 | return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); | |
377 | } | |
378 | } | |
379 | ||
380 | skip_controllers: | |
381 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
382 | r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); | |
383 | if (r < 0) | |
384 | return r; | |
385 | } | |
386 | ||
387 | r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); | |
388 | if (r < 0) | |
389 | return r; | |
390 | ||
391 | if (!userns) | |
511a8cfe LP |
392 | return mount_nofollow_verbose(LOG_ERR, NULL, cgroup_root, NULL, |
393 | MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); | |
04029482 LS |
394 | |
395 | return 0; | |
396 | } | |
397 | ||
398 | /* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */ | |
399 | static int mount_legacy_cgns_unsupported( | |
400 | const char *dest, | |
401 | CGroupUnified unified_requested, | |
402 | bool userns, | |
403 | uid_t uid_shift, | |
404 | uid_t uid_range, | |
405 | const char *selinux_apifs_context) { | |
406 | ||
594c3835 | 407 | _cleanup_set_free_ Set *controllers = NULL; |
04029482 LS |
408 | const char *cgroup_root; |
409 | int r; | |
410 | ||
411 | cgroup_root = prefix_roota(dest, "/sys/fs/cgroup"); | |
412 | ||
413 | (void) mkdir_p(cgroup_root, 0755); | |
414 | ||
415 | /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ | |
416 | r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW); | |
417 | if (r < 0) | |
418 | return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); | |
419 | if (r == 0) { | |
420 | _cleanup_free_ char *options = NULL; | |
421 | ||
7d85383e | 422 | r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options); |
04029482 LS |
423 | if (r < 0) |
424 | return log_oom(); | |
425 | ||
511a8cfe LP |
426 | r = mount_nofollow_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs", |
427 | MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options); | |
04029482 LS |
428 | if (r < 0) |
429 | return r; | |
430 | } | |
431 | ||
432 | r = cg_all_unified(); | |
433 | if (r < 0) | |
434 | return r; | |
435 | if (r > 0) | |
436 | goto skip_controllers; | |
437 | ||
438 | r = cg_kernel_controllers(&controllers); | |
439 | if (r < 0) | |
440 | return log_error_errno(r, "Failed to determine cgroup controllers: %m"); | |
441 | ||
442 | for (;;) { | |
443 | _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL; | |
444 | ||
445 | controller = set_steal_first(controllers); | |
446 | if (!controller) | |
447 | break; | |
448 | ||
c6134d3e | 449 | origin = path_join("/sys/fs/cgroup/", controller); |
04029482 LS |
450 | if (!origin) |
451 | return log_oom(); | |
452 | ||
453 | r = readlink_malloc(origin, &combined); | |
454 | if (r == -EINVAL) { | |
455 | /* Not a symbolic link, but directly a single cgroup hierarchy */ | |
456 | ||
457 | r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); | |
458 | if (r < 0) | |
459 | return r; | |
460 | ||
461 | } else if (r < 0) | |
462 | return log_error_errno(r, "Failed to read link %s: %m", origin); | |
463 | else { | |
464 | _cleanup_free_ char *target = NULL; | |
465 | ||
c6134d3e | 466 | target = path_join(dest, origin); |
04029482 LS |
467 | if (!target) |
468 | return log_oom(); | |
469 | ||
470 | /* A symbolic link, a combination of controllers in one hierarchy */ | |
471 | ||
472 | if (!filename_is_valid(combined)) { | |
473 | log_warning("Ignoring invalid combined hierarchy %s.", combined); | |
474 | continue; | |
475 | } | |
476 | ||
477 | r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); | |
478 | if (r < 0) | |
479 | return r; | |
480 | ||
6c9c51e5 | 481 | r = symlink_idempotent(combined, target, false); |
04029482 LS |
482 | if (r == -EINVAL) |
483 | return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); | |
484 | if (r < 0) | |
485 | return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); | |
486 | } | |
487 | } | |
488 | ||
489 | skip_controllers: | |
490 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
491 | r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); | |
492 | if (r < 0) | |
493 | return r; | |
494 | } | |
495 | ||
496 | r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); | |
497 | if (r < 0) | |
498 | return r; | |
499 | ||
511a8cfe LP |
500 | return mount_nofollow_verbose(LOG_ERR, NULL, cgroup_root, NULL, |
501 | MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); | |
04029482 LS |
502 | } |
503 | ||
504 | static int mount_unified_cgroups(const char *dest) { | |
505 | const char *p; | |
506 | int r; | |
507 | ||
508 | assert(dest); | |
509 | ||
510 | p = prefix_roota(dest, "/sys/fs/cgroup"); | |
511 | ||
512 | (void) mkdir_p(p, 0755); | |
513 | ||
514 | r = path_is_mount_point(p, dest, AT_SYMLINK_FOLLOW); | |
515 | if (r < 0) | |
516 | return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p); | |
517 | if (r > 0) { | |
518 | p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs"); | |
519 | if (access(p, F_OK) >= 0) | |
520 | return 0; | |
521 | if (errno != ENOENT) | |
522 | return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p); | |
523 | ||
baaa35ad ZJS |
524 | return log_error_errno(SYNTHETIC_ERRNO(EINVAL), |
525 | "%s is already mounted but not a unified cgroup hierarchy. Refusing.", p); | |
04029482 LS |
526 | } |
527 | ||
511a8cfe | 528 | return mount_nofollow_verbose(LOG_ERR, "cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); |
04029482 LS |
529 | } |
530 | ||
531 | int mount_cgroups( | |
532 | const char *dest, | |
533 | CGroupUnified unified_requested, | |
534 | bool userns, | |
535 | uid_t uid_shift, | |
536 | uid_t uid_range, | |
537 | const char *selinux_apifs_context, | |
538 | bool use_cgns) { | |
539 | ||
540 | if (unified_requested >= CGROUP_UNIFIED_ALL) | |
541 | return mount_unified_cgroups(dest); | |
542 | if (use_cgns) | |
543 | return mount_legacy_cgns_supported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); | |
544 | ||
545 | return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); | |
546 | } | |
547 | ||
548 | static int mount_systemd_cgroup_writable_one(const char *root, const char *own) { | |
549 | int r; | |
550 | ||
551 | assert(root); | |
552 | assert(own); | |
553 | ||
554 | /* Make our own cgroup a (writable) bind mount */ | |
511a8cfe | 555 | r = mount_nofollow_verbose(LOG_ERR, own, own, NULL, MS_BIND, NULL); |
04029482 LS |
556 | if (r < 0) |
557 | return r; | |
558 | ||
559 | /* And then remount the systemd cgroup root read-only */ | |
511a8cfe LP |
560 | return mount_nofollow_verbose(LOG_ERR, NULL, root, NULL, |
561 | MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); | |
04029482 LS |
562 | } |
563 | ||
564 | int mount_systemd_cgroup_writable( | |
565 | const char *dest, | |
566 | CGroupUnified unified_requested) { | |
567 | ||
568 | _cleanup_free_ char *own_cgroup_path = NULL; | |
569 | const char *root, *own; | |
570 | int r; | |
571 | ||
572 | assert(dest); | |
573 | ||
574 | r = cg_pid_get_path(NULL, 0, &own_cgroup_path); | |
575 | if (r < 0) | |
576 | return log_error_errno(r, "Failed to determine our own cgroup path: %m"); | |
577 | ||
578 | /* If we are living in the top-level, then there's nothing to do... */ | |
579 | if (path_equal(own_cgroup_path, "/")) | |
580 | return 0; | |
581 | ||
582 | if (unified_requested >= CGROUP_UNIFIED_ALL) { | |
583 | ||
584 | root = prefix_roota(dest, "/sys/fs/cgroup"); | |
585 | own = strjoina(root, own_cgroup_path); | |
586 | ||
587 | } else { | |
588 | ||
589 | if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { | |
590 | root = prefix_roota(dest, "/sys/fs/cgroup/unified"); | |
591 | own = strjoina(root, own_cgroup_path); | |
592 | ||
593 | r = mount_systemd_cgroup_writable_one(root, own); | |
594 | if (r < 0) | |
595 | return r; | |
596 | } | |
597 | ||
598 | root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); | |
599 | own = strjoina(root, own_cgroup_path); | |
600 | } | |
601 | ||
602 | return mount_systemd_cgroup_writable_one(root, own); | |
603 | } |