]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/cgroup-setup.c
sd-boot+bootctl: invert order of entries w/o sort-key
[thirdparty/systemd.git] / src / shared / cgroup-setup.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <unistd.h>
4
5 #include "cgroup-setup.h"
6 #include "cgroup-util.h"
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "mkdir.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "proc-cmdline.h"
15 #include "process-util.h"
16 #include "recurse-dir.h"
17 #include "stdio-util.h"
18 #include "string-util.h"
19 #include "user-util.h"
20 #include "virt.h"
21
22 static int cg_any_controller_used_for_v1(void) {
23 _cleanup_free_ char *buf = NULL;
24 _cleanup_strv_free_ char **lines = NULL;
25 char **line;
26 int r;
27
28 r = read_full_virtual_file("/proc/cgroups", &buf, NULL);
29 if (r < 0)
30 return log_debug_errno(r, "Could not read /proc/cgroups, ignoring: %m");
31
32 r = strv_split_newlines_full(&lines, buf, 0);
33 if (r < 0)
34 return r;
35
36 /* The intention of this is to check if the fully unified cgroup tree setup is possible, meaning all
37 * enabled kernel cgroup controllers are currently not in use by cgroup1. For reference:
38 * https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-
39 *
40 * Note that this is typically only useful to check inside a container where we don't know what
41 * cgroup tree setup is in use by the host; if the host is using legacy or hybrid, we can't use
42 * unified since some or all controllers would be missing. This is not the best way to detect this,
43 * as whatever container manager created our container should have mounted /sys/fs/cgroup
44 * appropriately, but in case that wasn't done, we try to detect if it's possible for us to use
45 * unified cgroups. */
46 STRV_FOREACH(line, lines) {
47 _cleanup_free_ char *name = NULL, *hierarchy_id = NULL, *num = NULL, *enabled = NULL;
48
49 /* Skip header line */
50 if (startswith(*line, "#"))
51 continue;
52
53 const char *p = *line;
54 r = extract_many_words(&p, NULL, 0, &name, &hierarchy_id, &num, &enabled, NULL);
55 if (r < 0)
56 return log_debug_errno(r, "Error parsing /proc/cgroups line, ignoring: %m");
57 else if (r < 4) {
58 log_debug("Invalid /proc/cgroups line, ignoring.");
59 continue;
60 }
61
62 /* Ignore disabled controllers. */
63 if (streq(enabled, "0"))
64 continue;
65
66 /* Ignore controllers we don't care about. */
67 if (cgroup_controller_from_string(name) < 0)
68 continue;
69
70 /* Since the unified cgroup doesn't use multiple hierarchies, if any controller has a
71 * non-zero hierarchy_id that means it's in use already in a legacy (or hybrid) cgroup v1
72 * hierarchy, and can't be used in a unified cgroup. */
73 if (!streq(hierarchy_id, "0")) {
74 log_debug("Cgroup controller %s in use by legacy v1 hierarchy.", name);
75 return 1;
76 }
77 }
78
79 return 0;
80 }
81
82 bool cg_is_unified_wanted(void) {
83 static thread_local int wanted = -1;
84 bool b;
85 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
86 _cleanup_free_ char *c = NULL;
87 int r;
88
89 /* If we have a cached value, return that. */
90 if (wanted >= 0)
91 return wanted;
92
93 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
94 r = cg_unified_cached(true);
95 if (r >= 0)
96 return (wanted = r >= CGROUP_UNIFIED_ALL);
97
98 /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
99 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
100 if (r > 0)
101 return (wanted = b);
102
103 /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
104 * use hybrid or legacy hierarchy. */
105 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
106 if (r > 0 && streq_ptr(c, "all"))
107 return (wanted = true);
108
109 /* If any controller is in use as v1, don't use unified. */
110 if (cg_any_controller_used_for_v1() > 0)
111 return (wanted = false);
112
113 return (wanted = is_default);
114 }
115
116 bool cg_is_legacy_wanted(void) {
117 static thread_local int wanted = -1;
118
119 /* If we have a cached value, return that. */
120 if (wanted >= 0)
121 return wanted;
122
123 /* Check if we have cgroup v2 already mounted. */
124 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
125 return (wanted = false);
126
127 /* Otherwise, assume that at least partial legacy is wanted,
128 * since cgroup v2 should already be mounted at this point. */
129 return (wanted = true);
130 }
131
132 bool cg_is_hybrid_wanted(void) {
133 static thread_local int wanted = -1;
134 int r;
135 bool b;
136 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
137 /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
138 * because if we get called, it means that unified hierarchy was not mounted. */
139
140 /* If we have a cached value, return that. */
141 if (wanted >= 0)
142 return wanted;
143
144 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
145 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
146 return (wanted = false);
147
148 /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
149 * a non-error result. */
150 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
151
152 /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
153 * negation. */
154 return (wanted = r > 0 ? !b : is_default);
155 }
156
157 int cg_weight_parse(const char *s, uint64_t *ret) {
158 uint64_t u;
159 int r;
160
161 if (isempty(s)) {
162 *ret = CGROUP_WEIGHT_INVALID;
163 return 0;
164 }
165
166 r = safe_atou64(s, &u);
167 if (r < 0)
168 return r;
169
170 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
171 return -ERANGE;
172
173 *ret = u;
174 return 0;
175 }
176
177 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
178 uint64_t u;
179 int r;
180
181 if (isempty(s)) {
182 *ret = CGROUP_CPU_SHARES_INVALID;
183 return 0;
184 }
185
186 r = safe_atou64(s, &u);
187 if (r < 0)
188 return r;
189
190 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
191 return -ERANGE;
192
193 *ret = u;
194 return 0;
195 }
196
197 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
198 uint64_t u;
199 int r;
200
201 if (isempty(s)) {
202 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
203 return 0;
204 }
205
206 r = safe_atou64(s, &u);
207 if (r < 0)
208 return r;
209
210 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
211 return -ERANGE;
212
213 *ret = u;
214 return 0;
215 }
216
217 static int trim_cb(
218 RecurseDirEvent event,
219 const char *path,
220 int dir_fd,
221 int inode_fd,
222 const struct dirent *de,
223 const struct statx *sx,
224 void *userdata) {
225
226 /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
227 if (event == RECURSE_DIR_LEAVE &&
228 de->d_type == DT_DIR &&
229 unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
230 !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
231 log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
232
233 return RECURSE_DIR_CONTINUE;
234 }
235
236 int cg_trim(const char *controller, const char *path, bool delete_root) {
237 _cleanup_free_ char *fs = NULL;
238 int r, q;
239
240 assert(path);
241 assert(controller);
242
243 r = cg_get_path(controller, path, NULL, &fs);
244 if (r < 0)
245 return r;
246
247 r = recurse_dir_at(
248 AT_FDCWD,
249 fs,
250 /* statx_mask= */ 0,
251 /* n_depth_max= */ UINT_MAX,
252 RECURSE_DIR_ENSURE_TYPE,
253 trim_cb,
254 NULL);
255 if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
256 r = 0;
257 else if (r < 0)
258 log_debug_errno(r, "Failed to iterate through cgroup %s: %m", path);
259
260 /* If we shall delete the top-level cgroup, then propagate the faiure to do so (except if it is
261 * already gone anyway). Also, let's debug log about this failure, except if the error code is an
262 * expected one. */
263 if (delete_root && !empty_or_root(path) &&
264 rmdir(fs) < 0 && errno != ENOENT) {
265 if (!IN_SET(errno, ENOTEMPTY, EBUSY))
266 log_debug_errno(errno, "Failed to trim cgroup %s: %m", path);
267 if (r >= 0)
268 r = -errno;
269 }
270
271 q = cg_hybrid_unified();
272 if (q < 0)
273 return q;
274 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
275 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
276
277 return r;
278 }
279
280 /* Create a cgroup in the hierarchy of controller.
281 * Returns 0 if the group already existed, 1 on success, negative otherwise.
282 */
283 int cg_create(const char *controller, const char *path) {
284 _cleanup_free_ char *fs = NULL;
285 int r;
286
287 r = cg_get_path_and_check(controller, path, NULL, &fs);
288 if (r < 0)
289 return r;
290
291 r = mkdir_parents(fs, 0755);
292 if (r < 0)
293 return r;
294
295 r = RET_NERRNO(mkdir(fs, 0755));
296 if (r == -EEXIST)
297 return 0;
298 if (r < 0)
299 return r;
300
301 r = cg_hybrid_unified();
302 if (r < 0)
303 return r;
304
305 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
306 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
307 if (r < 0)
308 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
309 }
310
311 return 1;
312 }
313
314 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
315 int r, q;
316
317 assert(pid >= 0);
318
319 r = cg_create(controller, path);
320 if (r < 0)
321 return r;
322
323 q = cg_attach(controller, path, pid);
324 if (q < 0)
325 return q;
326
327 /* This does not remove the cgroup on failure */
328 return r;
329 }
330
331 int cg_attach(const char *controller, const char *path, pid_t pid) {
332 _cleanup_free_ char *fs = NULL;
333 char c[DECIMAL_STR_MAX(pid_t) + 2];
334 int r;
335
336 assert(path);
337 assert(pid >= 0);
338
339 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
340 if (r < 0)
341 return r;
342
343 if (pid == 0)
344 pid = getpid_cached();
345
346 xsprintf(c, PID_FMT "\n", pid);
347
348 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
349 if (r == -EOPNOTSUPP && cg_is_threaded(controller, path) > 0)
350 /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
351 return -EUCLEAN;
352 if (r < 0)
353 return r;
354
355 r = cg_hybrid_unified();
356 if (r < 0)
357 return r;
358
359 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
360 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
361 if (r < 0)
362 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
363 }
364
365 return 0;
366 }
367
368 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
369 int r;
370
371 assert(controller);
372 assert(path);
373 assert(pid >= 0);
374
375 r = cg_attach(controller, path, pid);
376 if (r < 0) {
377 char prefix[strlen(path) + 1];
378
379 /* This didn't work? Then let's try all prefixes of
380 * the destination */
381
382 PATH_FOREACH_PREFIX(prefix, path) {
383 int q;
384
385 q = cg_attach(controller, prefix, pid);
386 if (q >= 0)
387 return q;
388 }
389 }
390
391 return r;
392 }
393
394 int cg_set_access(
395 const char *controller,
396 const char *path,
397 uid_t uid,
398 gid_t gid) {
399
400 struct Attribute {
401 const char *name;
402 bool fatal;
403 };
404
405 /* cgroup v1, aka legacy/non-unified */
406 static const struct Attribute legacy_attributes[] = {
407 { "cgroup.procs", true },
408 { "tasks", false },
409 { "cgroup.clone_children", false },
410 {},
411 };
412
413 /* cgroup v2, aka unified */
414 static const struct Attribute unified_attributes[] = {
415 { "cgroup.procs", true },
416 { "cgroup.subtree_control", true },
417 { "cgroup.threads", false },
418 {},
419 };
420
421 static const struct Attribute* const attributes[] = {
422 [false] = legacy_attributes,
423 [true] = unified_attributes,
424 };
425
426 _cleanup_free_ char *fs = NULL;
427 const struct Attribute *i;
428 int r, unified;
429
430 assert(path);
431
432 if (uid == UID_INVALID && gid == GID_INVALID)
433 return 0;
434
435 unified = cg_unified_controller(controller);
436 if (unified < 0)
437 return unified;
438
439 /* Configure access to the cgroup itself */
440 r = cg_get_path(controller, path, NULL, &fs);
441 if (r < 0)
442 return r;
443
444 r = chmod_and_chown(fs, 0755, uid, gid);
445 if (r < 0)
446 return r;
447
448 /* Configure access to the cgroup's attributes */
449 for (i = attributes[unified]; i->name; i++) {
450 fs = mfree(fs);
451
452 r = cg_get_path(controller, path, i->name, &fs);
453 if (r < 0)
454 return r;
455
456 r = chmod_and_chown(fs, 0644, uid, gid);
457 if (r < 0) {
458 if (i->fatal)
459 return r;
460
461 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
462 }
463 }
464
465 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
466 r = cg_hybrid_unified();
467 if (r < 0)
468 return r;
469 if (r > 0) {
470 /* Always propagate access mode from unified to legacy controller */
471 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
472 if (r < 0)
473 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
474 }
475 }
476
477 return 0;
478 }
479
480 int cg_migrate(
481 const char *cfrom,
482 const char *pfrom,
483 const char *cto,
484 const char *pto,
485 CGroupFlags flags) {
486
487 bool done = false;
488 _cleanup_set_free_ Set *s = NULL;
489 int r, ret = 0;
490 pid_t my_pid;
491
492 assert(cfrom);
493 assert(pfrom);
494 assert(cto);
495 assert(pto);
496
497 s = set_new(NULL);
498 if (!s)
499 return -ENOMEM;
500
501 my_pid = getpid_cached();
502
503 do {
504 _cleanup_fclose_ FILE *f = NULL;
505 pid_t pid = 0;
506 done = true;
507
508 r = cg_enumerate_processes(cfrom, pfrom, &f);
509 if (r < 0) {
510 if (ret >= 0 && r != -ENOENT)
511 return r;
512
513 return ret;
514 }
515
516 while ((r = cg_read_pid(f, &pid)) > 0) {
517
518 /* This might do weird stuff if we aren't a
519 * single-threaded program. However, we
520 * luckily know we are not */
521 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
522 continue;
523
524 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
525 continue;
526
527 /* Ignore kernel threads. Since they can only
528 * exist in the root cgroup, we only check for
529 * them there. */
530 if (cfrom &&
531 empty_or_root(pfrom) &&
532 is_kernel_thread(pid) > 0)
533 continue;
534
535 r = cg_attach(cto, pto, pid);
536 if (r < 0) {
537 if (ret >= 0 && r != -ESRCH)
538 ret = r;
539 } else if (ret == 0)
540 ret = 1;
541
542 done = false;
543
544 r = set_put(s, PID_TO_PTR(pid));
545 if (r < 0) {
546 if (ret >= 0)
547 return r;
548
549 return ret;
550 }
551 }
552
553 if (r < 0) {
554 if (ret >= 0)
555 return r;
556
557 return ret;
558 }
559 } while (!done);
560
561 return ret;
562 }
563
564 int cg_migrate_recursive(
565 const char *cfrom,
566 const char *pfrom,
567 const char *cto,
568 const char *pto,
569 CGroupFlags flags) {
570
571 _cleanup_closedir_ DIR *d = NULL;
572 int r, ret = 0;
573 char *fn;
574
575 assert(cfrom);
576 assert(pfrom);
577 assert(cto);
578 assert(pto);
579
580 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
581
582 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
583 if (r < 0) {
584 if (ret >= 0 && r != -ENOENT)
585 return r;
586
587 return ret;
588 }
589
590 while ((r = cg_read_subgroup(d, &fn)) > 0) {
591 _cleanup_free_ char *p = NULL;
592
593 p = path_join(empty_to_root(pfrom), fn);
594 free(fn);
595 if (!p)
596 return -ENOMEM;
597
598 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
599 if (r != 0 && ret >= 0)
600 ret = r;
601 }
602
603 if (r < 0 && ret >= 0)
604 ret = r;
605
606 if (flags & CGROUP_REMOVE) {
607 r = cg_rmdir(cfrom, pfrom);
608 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
609 return r;
610 }
611
612 return ret;
613 }
614
615 int cg_migrate_recursive_fallback(
616 const char *cfrom,
617 const char *pfrom,
618 const char *cto,
619 const char *pto,
620 CGroupFlags flags) {
621
622 int r;
623
624 assert(cfrom);
625 assert(pfrom);
626 assert(cto);
627 assert(pto);
628
629 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
630 if (r < 0) {
631 char prefix[strlen(pto) + 1];
632
633 /* This didn't work? Then let's try all prefixes of the destination */
634
635 PATH_FOREACH_PREFIX(prefix, pto) {
636 int q;
637
638 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
639 if (q >= 0)
640 return q;
641 }
642 }
643
644 return r;
645 }
646
647 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
648 CGroupController c;
649 CGroupMask done;
650 bool created;
651 int r;
652
653 /* This one will create a cgroup in our private tree, but also
654 * duplicate it in the trees specified in mask, and remove it
655 * in all others.
656 *
657 * Returns 0 if the group already existed in the systemd hierarchy,
658 * 1 on success, negative otherwise.
659 */
660
661 /* First create the cgroup in our own hierarchy. */
662 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
663 if (r < 0)
664 return r;
665 created = r;
666
667 /* If we are in the unified hierarchy, we are done now */
668 r = cg_all_unified();
669 if (r < 0)
670 return r;
671 if (r > 0)
672 return created;
673
674 supported &= CGROUP_MASK_V1;
675 mask = CGROUP_MASK_EXTEND_JOINED(mask);
676 done = 0;
677
678 /* Otherwise, do the same in the other hierarchies */
679 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
680 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
681 const char *n;
682
683 if (!FLAGS_SET(supported, bit))
684 continue;
685
686 if (FLAGS_SET(done, bit))
687 continue;
688
689 n = cgroup_controller_to_string(c);
690 if (FLAGS_SET(mask, bit))
691 (void) cg_create(n, path);
692
693 done |= CGROUP_MASK_EXTEND_JOINED(bit);
694 }
695
696 return created;
697 }
698
699 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
700 int r;
701
702 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
703 if (r < 0)
704 return r;
705
706 r = cg_all_unified();
707 if (r < 0)
708 return r;
709 if (r > 0)
710 return 0;
711
712 supported &= CGROUP_MASK_V1;
713 CGroupMask done = 0;
714
715 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
716 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
717 const char *p = NULL;
718
719 if (!FLAGS_SET(supported, bit))
720 continue;
721
722 if (FLAGS_SET(done, bit))
723 continue;
724
725 if (path_callback)
726 p = path_callback(bit, userdata);
727 if (!p)
728 p = path;
729
730 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
731 done |= CGROUP_MASK_EXTEND_JOINED(bit);
732 }
733
734 return 0;
735 }
736
737 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
738 CGroupController c;
739 CGroupMask done;
740 int r = 0, q;
741
742 assert(to_callback);
743
744 supported &= CGROUP_MASK_V1;
745 mask = CGROUP_MASK_EXTEND_JOINED(mask);
746 done = 0;
747
748 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
749 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
750 const char *to = NULL;
751
752 if (!FLAGS_SET(supported, bit))
753 continue;
754
755 if (FLAGS_SET(done, bit))
756 continue;
757
758 if (!FLAGS_SET(mask, bit))
759 continue;
760
761 to = to_callback(bit, userdata);
762
763 /* Remember first error and try continuing */
764 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
765 r = (r < 0) ? r : q;
766 }
767
768 return r;
769 }
770
771 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
772 int r, q;
773
774 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
775 if (r < 0)
776 return r;
777
778 q = cg_all_unified();
779 if (q < 0)
780 return q;
781 if (q > 0)
782 return r;
783
784 return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
785 }
786
787 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
788 CGroupController c;
789 CGroupMask done;
790 int r = 0, q;
791
792 supported &= CGROUP_MASK_V1;
793 mask = CGROUP_MASK_EXTEND_JOINED(mask);
794 done = 0;
795
796 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
797 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
798
799 if (!FLAGS_SET(supported, bit))
800 continue;
801
802 if (FLAGS_SET(done, bit))
803 continue;
804
805 if (FLAGS_SET(mask, bit)) {
806 /* Remember first error and try continuing */
807 q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
808 r = (r < 0) ? r : q;
809 }
810 done |= CGROUP_MASK_EXTEND_JOINED(bit);
811 }
812
813 return r;
814 }
815
816 int cg_enable_everywhere(
817 CGroupMask supported,
818 CGroupMask mask,
819 const char *p,
820 CGroupMask *ret_result_mask) {
821
822 _cleanup_fclose_ FILE *f = NULL;
823 _cleanup_free_ char *fs = NULL;
824 CGroupController c;
825 CGroupMask ret = 0;
826 int r;
827
828 assert(p);
829
830 if (supported == 0) {
831 if (ret_result_mask)
832 *ret_result_mask = 0;
833 return 0;
834 }
835
836 r = cg_all_unified();
837 if (r < 0)
838 return r;
839 if (r == 0) {
840 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
841 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
842 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
843 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
844 * minimize surprises here and reduce triggers for re-realization by always saying we fully
845 * succeeded.) */
846 if (ret_result_mask)
847 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
848 * CGROUP_MASK_V2: The 'supported' mask
849 * might contain pure-V1 or BPF
850 * controllers, and we never want to
851 * claim that we could enable those with
852 * cgroup.subtree_control */
853 return 0;
854 }
855
856 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
857 if (r < 0)
858 return r;
859
860 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
861 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
862 const char *n;
863
864 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
865 continue;
866
867 if (!FLAGS_SET(supported, bit))
868 continue;
869
870 n = cgroup_controller_to_string(c);
871 {
872 char s[1 + strlen(n) + 1];
873
874 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
875 strcpy(s + 1, n);
876
877 if (!f) {
878 f = fopen(fs, "we");
879 if (!f)
880 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
881 }
882
883 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
884 if (r < 0) {
885 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
886 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
887 clearerr(f);
888
889 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
890 * happens for example when we attempt to turn off a controller up in the tree that is
891 * used down in the tree. */
892 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
893 * only here, and not follow the same logic
894 * for other errors such as EINVAL or
895 * EOPNOTSUPP or anything else. That's
896 * because EBUSY indicates that the
897 * controllers is currently enabled and
898 * cannot be disabled because something down
899 * the hierarchy is still using it. Any other
900 * error most likely means something like "I
901 * never heard of this controller" or
902 * similar. In the former case it's hence
903 * safe to assume the controller is still on
904 * after the failed operation, while in the
905 * latter case it's safer to assume the
906 * controller is unknown and hence certainly
907 * not enabled. */
908 ret |= bit;
909 } else {
910 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
911 if (FLAGS_SET(mask, bit))
912 ret |= bit;
913 }
914 }
915 }
916
917 /* Let's return the precise set of controllers now enabled for the cgroup. */
918 if (ret_result_mask)
919 *ret_result_mask = ret;
920
921 return 0;
922 }