]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/cgroup-setup.c
1dc56b11adde20927464db84a5341f0557de1144
[thirdparty/systemd.git] / src / shared / cgroup-setup.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <unistd.h>
4
5 #include "cgroup-setup.h"
6 #include "cgroup-util.h"
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "missing_threads.h"
12 #include "mkdir.h"
13 #include "parse-util.h"
14 #include "path-util.h"
15 #include "proc-cmdline.h"
16 #include "process-util.h"
17 #include "recurse-dir.h"
18 #include "stdio-util.h"
19 #include "string-util.h"
20 #include "user-util.h"
21 #include "virt.h"
22
23 static int cg_any_controller_used_for_v1(void) {
24 _cleanup_free_ char *buf = NULL;
25 _cleanup_strv_free_ char **lines = NULL;
26 int r;
27
28 r = read_full_virtual_file("/proc/cgroups", &buf, NULL);
29 if (r < 0)
30 return log_debug_errno(r, "Could not read /proc/cgroups, ignoring: %m");
31
32 r = strv_split_newlines_full(&lines, buf, 0);
33 if (r < 0)
34 return r;
35
36 /* The intention of this is to check if the fully unified cgroup tree setup is possible, meaning all
37 * enabled kernel cgroup controllers are currently not in use by cgroup1. For reference:
38 * https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-
39 *
40 * Note that this is typically only useful to check inside a container where we don't know what
41 * cgroup tree setup is in use by the host; if the host is using legacy or hybrid, we can't use
42 * unified since some or all controllers would be missing. This is not the best way to detect this,
43 * as whatever container manager created our container should have mounted /sys/fs/cgroup
44 * appropriately, but in case that wasn't done, we try to detect if it's possible for us to use
45 * unified cgroups. */
46 STRV_FOREACH(line, lines) {
47 _cleanup_free_ char *name = NULL, *hierarchy_id = NULL, *num = NULL, *enabled = NULL;
48
49 /* Skip header line */
50 if (startswith(*line, "#"))
51 continue;
52
53 const char *p = *line;
54 r = extract_many_words(&p, NULL, 0, &name, &hierarchy_id, &num, &enabled, NULL);
55 if (r < 0)
56 return log_debug_errno(r, "Error parsing /proc/cgroups line, ignoring: %m");
57 else if (r < 4) {
58 log_debug("Invalid /proc/cgroups line, ignoring.");
59 continue;
60 }
61
62 /* Ignore disabled controllers. */
63 if (streq(enabled, "0"))
64 continue;
65
66 /* Ignore controllers we don't care about. */
67 if (cgroup_controller_from_string(name) < 0)
68 continue;
69
70 /* Since the unified cgroup doesn't use multiple hierarchies, if any controller has a
71 * non-zero hierarchy_id that means it's in use already in a legacy (or hybrid) cgroup v1
72 * hierarchy, and can't be used in a unified cgroup. */
73 if (!streq(hierarchy_id, "0")) {
74 log_debug("Cgroup controller %s in use by legacy v1 hierarchy.", name);
75 return 1;
76 }
77 }
78
79 return 0;
80 }
81
82 bool cg_is_unified_wanted(void) {
83 static thread_local int wanted = -1;
84 bool b;
85 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
86 _cleanup_free_ char *c = NULL;
87 int r;
88
89 /* If we have a cached value, return that. */
90 if (wanted >= 0)
91 return wanted;
92
93 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
94 r = cg_unified_cached(true);
95 if (r >= 0)
96 return (wanted = r >= CGROUP_UNIFIED_ALL);
97
98 /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
99 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
100 if (r > 0)
101 return (wanted = b);
102
103 /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
104 * use hybrid or legacy hierarchy. */
105 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
106 if (r > 0 && streq_ptr(c, "all"))
107 return (wanted = true);
108
109 /* If any controller is in use as v1, don't use unified. */
110 if (cg_any_controller_used_for_v1() > 0)
111 return (wanted = false);
112
113 return (wanted = is_default);
114 }
115
116 bool cg_is_legacy_wanted(void) {
117 static thread_local int wanted = -1;
118
119 /* If we have a cached value, return that. */
120 if (wanted >= 0)
121 return wanted;
122
123 /* Check if we have cgroup v2 already mounted. */
124 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
125 return (wanted = false);
126
127 /* Otherwise, assume that at least partial legacy is wanted,
128 * since cgroup v2 should already be mounted at this point. */
129 return (wanted = true);
130 }
131
132 bool cg_is_hybrid_wanted(void) {
133 static thread_local int wanted = -1;
134 int r;
135 bool b;
136 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
137 /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
138 * because if we get called, it means that unified hierarchy was not mounted. */
139
140 /* If we have a cached value, return that. */
141 if (wanted >= 0)
142 return wanted;
143
144 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
145 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
146 return (wanted = false);
147
148 /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
149 * a non-error result. */
150 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
151
152 /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
153 * negation. */
154 return (wanted = r > 0 ? !b : is_default);
155 }
156
157 int cg_weight_parse(const char *s, uint64_t *ret) {
158 uint64_t u;
159 int r;
160
161 if (isempty(s)) {
162 *ret = CGROUP_WEIGHT_INVALID;
163 return 0;
164 }
165
166 r = safe_atou64(s, &u);
167 if (r < 0)
168 return r;
169
170 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
171 return -ERANGE;
172
173 *ret = u;
174 return 0;
175 }
176
177 int cg_cpu_weight_parse(const char *s, uint64_t *ret) {
178 if (streq_ptr(s, "idle"))
179 return *ret = CGROUP_WEIGHT_IDLE;
180 return cg_weight_parse(s, ret);
181 }
182
183 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
184 uint64_t u;
185 int r;
186
187 if (isempty(s)) {
188 *ret = CGROUP_CPU_SHARES_INVALID;
189 return 0;
190 }
191
192 r = safe_atou64(s, &u);
193 if (r < 0)
194 return r;
195
196 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
197 return -ERANGE;
198
199 *ret = u;
200 return 0;
201 }
202
203 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
204 uint64_t u;
205 int r;
206
207 if (isempty(s)) {
208 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
209 return 0;
210 }
211
212 r = safe_atou64(s, &u);
213 if (r < 0)
214 return r;
215
216 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
217 return -ERANGE;
218
219 *ret = u;
220 return 0;
221 }
222
223 static int trim_cb(
224 RecurseDirEvent event,
225 const char *path,
226 int dir_fd,
227 int inode_fd,
228 const struct dirent *de,
229 const struct statx *sx,
230 void *userdata) {
231
232 /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
233 if (event == RECURSE_DIR_LEAVE &&
234 de->d_type == DT_DIR &&
235 unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
236 !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
237 log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
238
239 return RECURSE_DIR_CONTINUE;
240 }
241
242 int cg_trim(const char *controller, const char *path, bool delete_root) {
243 _cleanup_free_ char *fs = NULL;
244 int r, q;
245
246 assert(path);
247 assert(controller);
248
249 r = cg_get_path(controller, path, NULL, &fs);
250 if (r < 0)
251 return r;
252
253 r = recurse_dir_at(
254 AT_FDCWD,
255 fs,
256 /* statx_mask= */ 0,
257 /* n_depth_max= */ UINT_MAX,
258 RECURSE_DIR_ENSURE_TYPE,
259 trim_cb,
260 NULL);
261 if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
262 r = 0;
263 else if (r < 0)
264 log_debug_errno(r, "Failed to iterate through cgroup %s: %m", path);
265
266 /* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
267 * already gone anyway). Also, let's debug log about this failure, except if the error code is an
268 * expected one. */
269 if (delete_root && !empty_or_root(path) &&
270 rmdir(fs) < 0 && errno != ENOENT) {
271 if (!IN_SET(errno, ENOTEMPTY, EBUSY))
272 log_debug_errno(errno, "Failed to trim cgroup %s: %m", path);
273 if (r >= 0)
274 r = -errno;
275 }
276
277 q = cg_hybrid_unified();
278 if (q < 0)
279 return q;
280 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
281 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
282
283 return r;
284 }
285
286 /* Create a cgroup in the hierarchy of controller.
287 * Returns 0 if the group already existed, 1 on success, negative otherwise.
288 */
289 int cg_create(const char *controller, const char *path) {
290 _cleanup_free_ char *fs = NULL;
291 int r;
292
293 r = cg_get_path_and_check(controller, path, NULL, &fs);
294 if (r < 0)
295 return r;
296
297 r = mkdir_parents(fs, 0755);
298 if (r < 0)
299 return r;
300
301 r = RET_NERRNO(mkdir(fs, 0755));
302 if (r == -EEXIST)
303 return 0;
304 if (r < 0)
305 return r;
306
307 r = cg_hybrid_unified();
308 if (r < 0)
309 return r;
310
311 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
312 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
313 if (r < 0)
314 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
315 }
316
317 return 1;
318 }
319
320 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
321 int r, q;
322
323 assert(pid >= 0);
324
325 r = cg_create(controller, path);
326 if (r < 0)
327 return r;
328
329 q = cg_attach(controller, path, pid);
330 if (q < 0)
331 return q;
332
333 /* This does not remove the cgroup on failure */
334 return r;
335 }
336
337 int cg_attach(const char *controller, const char *path, pid_t pid) {
338 _cleanup_free_ char *fs = NULL;
339 char c[DECIMAL_STR_MAX(pid_t) + 2];
340 int r;
341
342 assert(path);
343 assert(pid >= 0);
344
345 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
346 if (r < 0)
347 return r;
348
349 if (pid == 0)
350 pid = getpid_cached();
351
352 xsprintf(c, PID_FMT "\n", pid);
353
354 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
355 if (r == -EOPNOTSUPP && cg_is_threaded(controller, path) > 0)
356 /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
357 return -EUCLEAN;
358 if (r < 0)
359 return r;
360
361 r = cg_hybrid_unified();
362 if (r < 0)
363 return r;
364
365 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
366 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
367 if (r < 0)
368 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
369 }
370
371 return 0;
372 }
373
374 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
375 int r;
376
377 assert(controller);
378 assert(path);
379 assert(pid >= 0);
380
381 r = cg_attach(controller, path, pid);
382 if (r < 0) {
383 char prefix[strlen(path) + 1];
384
385 /* This didn't work? Then let's try all prefixes of
386 * the destination */
387
388 PATH_FOREACH_PREFIX(prefix, path) {
389 int q;
390
391 q = cg_attach(controller, prefix, pid);
392 if (q >= 0)
393 return q;
394 }
395 }
396
397 return r;
398 }
399
400 int cg_set_access(
401 const char *controller,
402 const char *path,
403 uid_t uid,
404 gid_t gid) {
405
406 struct Attribute {
407 const char *name;
408 bool fatal;
409 };
410
411 /* cgroup v1, aka legacy/non-unified */
412 static const struct Attribute legacy_attributes[] = {
413 { "cgroup.procs", true },
414 { "tasks", false },
415 { "cgroup.clone_children", false },
416 {},
417 };
418
419 /* cgroup v2, aka unified */
420 static const struct Attribute unified_attributes[] = {
421 { "cgroup.procs", true },
422 { "cgroup.subtree_control", true },
423 { "cgroup.threads", false },
424 {},
425 };
426
427 static const struct Attribute* const attributes[] = {
428 [false] = legacy_attributes,
429 [true] = unified_attributes,
430 };
431
432 _cleanup_free_ char *fs = NULL;
433 const struct Attribute *i;
434 int r, unified;
435
436 assert(path);
437
438 if (uid == UID_INVALID && gid == GID_INVALID)
439 return 0;
440
441 unified = cg_unified_controller(controller);
442 if (unified < 0)
443 return unified;
444
445 /* Configure access to the cgroup itself */
446 r = cg_get_path(controller, path, NULL, &fs);
447 if (r < 0)
448 return r;
449
450 r = chmod_and_chown(fs, 0755, uid, gid);
451 if (r < 0)
452 return r;
453
454 /* Configure access to the cgroup's attributes */
455 for (i = attributes[unified]; i->name; i++) {
456 fs = mfree(fs);
457
458 r = cg_get_path(controller, path, i->name, &fs);
459 if (r < 0)
460 return r;
461
462 r = chmod_and_chown(fs, 0644, uid, gid);
463 if (r < 0) {
464 if (i->fatal)
465 return r;
466
467 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
468 }
469 }
470
471 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
472 r = cg_hybrid_unified();
473 if (r < 0)
474 return r;
475 if (r > 0) {
476 /* Always propagate access mode from unified to legacy controller */
477 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
478 if (r < 0)
479 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
480 }
481 }
482
483 return 0;
484 }
485
486 struct access_callback_data {
487 uid_t uid;
488 gid_t gid;
489 int error;
490 };
491
492 static int access_callback(
493 RecurseDirEvent event,
494 const char *path,
495 int dir_fd,
496 int inode_fd,
497 const struct dirent *de,
498 const struct statx *sx,
499 void *userdata) {
500
501 struct access_callback_data *d = ASSERT_PTR(userdata);
502
503 if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
504 return RECURSE_DIR_CONTINUE;
505
506 assert(inode_fd >= 0);
507
508 /* fchown() doesn't support O_PATH fds, hence we use the /proc/self/fd/ trick */
509 if (chown(FORMAT_PROC_FD_PATH(inode_fd), d->uid, d->gid) < 0) {
510 log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", ASSERT_PTR(path));
511
512 if (d->error == 0) /* Return last error to caller */
513 d->error = errno;
514 }
515
516 return RECURSE_DIR_CONTINUE;
517 }
518
519 int cg_set_access_recursive(
520 const char *controller,
521 const char *path,
522 uid_t uid,
523 gid_t gid) {
524
525 _cleanup_close_ int fd = -EBADF;
526 _cleanup_free_ char *fs = NULL;
527 int r;
528
529 /* A recursive version of cg_set_access(). But note that this one changes ownership of *all* files,
530 * not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
531 * delegate, and cg_set_access_recursive() for any subcrgoups you might want to create below it. */
532
533 if (!uid_is_valid(uid) && !gid_is_valid(gid))
534 return 0;
535
536 r = cg_get_path(controller, path, NULL, &fs);
537 if (r < 0)
538 return r;
539
540 fd = open(fs, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
541 if (fd < 0)
542 return -errno;
543
544 struct access_callback_data d = {
545 .uid = uid,
546 .gid = gid,
547 };
548
549 r = recurse_dir(fd,
550 fs,
551 /* statx_mask= */ 0,
552 /* n_depth_max= */ UINT_MAX,
553 RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
554 access_callback,
555 &d);
556 if (r < 0)
557 return r;
558
559 return -d.error;
560 }
561
562 int cg_migrate(
563 const char *cfrom,
564 const char *pfrom,
565 const char *cto,
566 const char *pto,
567 CGroupFlags flags) {
568
569 bool done = false;
570 _cleanup_set_free_ Set *s = NULL;
571 int r, ret = 0;
572 pid_t my_pid;
573
574 assert(cfrom);
575 assert(pfrom);
576 assert(cto);
577 assert(pto);
578
579 s = set_new(NULL);
580 if (!s)
581 return -ENOMEM;
582
583 my_pid = getpid_cached();
584
585 do {
586 _cleanup_fclose_ FILE *f = NULL;
587 pid_t pid = 0;
588 done = true;
589
590 r = cg_enumerate_processes(cfrom, pfrom, &f);
591 if (r < 0) {
592 if (ret >= 0 && r != -ENOENT)
593 return r;
594
595 return ret;
596 }
597
598 while ((r = cg_read_pid(f, &pid)) > 0) {
599
600 /* This might do weird stuff if we aren't a
601 * single-threaded program. However, we
602 * luckily know we are not */
603 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
604 continue;
605
606 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
607 continue;
608
609 /* Ignore kernel threads. Since they can only
610 * exist in the root cgroup, we only check for
611 * them there. */
612 if (cfrom &&
613 empty_or_root(pfrom) &&
614 is_kernel_thread(pid) > 0)
615 continue;
616
617 r = cg_attach(cto, pto, pid);
618 if (r < 0) {
619 if (ret >= 0 && r != -ESRCH)
620 ret = r;
621 } else if (ret == 0)
622 ret = 1;
623
624 done = false;
625
626 r = set_put(s, PID_TO_PTR(pid));
627 if (r < 0) {
628 if (ret >= 0)
629 return r;
630
631 return ret;
632 }
633 }
634
635 if (r < 0) {
636 if (ret >= 0)
637 return r;
638
639 return ret;
640 }
641 } while (!done);
642
643 return ret;
644 }
645
646 int cg_migrate_recursive(
647 const char *cfrom,
648 const char *pfrom,
649 const char *cto,
650 const char *pto,
651 CGroupFlags flags) {
652
653 _cleanup_closedir_ DIR *d = NULL;
654 int r, ret = 0;
655 char *fn;
656
657 assert(cfrom);
658 assert(pfrom);
659 assert(cto);
660 assert(pto);
661
662 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
663
664 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
665 if (r < 0) {
666 if (ret >= 0 && r != -ENOENT)
667 return r;
668
669 return ret;
670 }
671
672 while ((r = cg_read_subgroup(d, &fn)) > 0) {
673 _cleanup_free_ char *p = NULL;
674
675 p = path_join(empty_to_root(pfrom), fn);
676 free(fn);
677 if (!p)
678 return -ENOMEM;
679
680 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
681 if (r != 0 && ret >= 0)
682 ret = r;
683 }
684
685 if (r < 0 && ret >= 0)
686 ret = r;
687
688 if (flags & CGROUP_REMOVE) {
689 r = cg_rmdir(cfrom, pfrom);
690 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
691 return r;
692 }
693
694 return ret;
695 }
696
697 int cg_migrate_recursive_fallback(
698 const char *cfrom,
699 const char *pfrom,
700 const char *cto,
701 const char *pto,
702 CGroupFlags flags) {
703
704 int r;
705
706 assert(cfrom);
707 assert(pfrom);
708 assert(cto);
709 assert(pto);
710
711 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
712 if (r < 0) {
713 char prefix[strlen(pto) + 1];
714
715 /* This didn't work? Then let's try all prefixes of the destination */
716
717 PATH_FOREACH_PREFIX(prefix, pto) {
718 int q;
719
720 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
721 if (q >= 0)
722 return q;
723 }
724 }
725
726 return r;
727 }
728
729 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
730 CGroupController c;
731 CGroupMask done;
732 bool created;
733 int r;
734
735 /* This one will create a cgroup in our private tree, but also
736 * duplicate it in the trees specified in mask, and remove it
737 * in all others.
738 *
739 * Returns 0 if the group already existed in the systemd hierarchy,
740 * 1 on success, negative otherwise.
741 */
742
743 /* First create the cgroup in our own hierarchy. */
744 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
745 if (r < 0)
746 return r;
747 created = r;
748
749 /* If we are in the unified hierarchy, we are done now */
750 r = cg_all_unified();
751 if (r < 0)
752 return r;
753 if (r > 0)
754 return created;
755
756 supported &= CGROUP_MASK_V1;
757 mask = CGROUP_MASK_EXTEND_JOINED(mask);
758 done = 0;
759
760 /* Otherwise, do the same in the other hierarchies */
761 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
762 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
763 const char *n;
764
765 if (!FLAGS_SET(supported, bit))
766 continue;
767
768 if (FLAGS_SET(done, bit))
769 continue;
770
771 n = cgroup_controller_to_string(c);
772 if (FLAGS_SET(mask, bit))
773 (void) cg_create(n, path);
774
775 done |= CGROUP_MASK_EXTEND_JOINED(bit);
776 }
777
778 return created;
779 }
780
781 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
782 int r;
783
784 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
785 if (r < 0)
786 return r;
787
788 r = cg_all_unified();
789 if (r < 0)
790 return r;
791 if (r > 0)
792 return 0;
793
794 supported &= CGROUP_MASK_V1;
795 CGroupMask done = 0;
796
797 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
798 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
799 const char *p = NULL;
800
801 if (!FLAGS_SET(supported, bit))
802 continue;
803
804 if (FLAGS_SET(done, bit))
805 continue;
806
807 if (path_callback)
808 p = path_callback(bit, userdata);
809 if (!p)
810 p = path;
811
812 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
813 done |= CGROUP_MASK_EXTEND_JOINED(bit);
814 }
815
816 return 0;
817 }
818
819 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
820 CGroupController c;
821 CGroupMask done;
822 int r = 0, q;
823
824 assert(to_callback);
825
826 supported &= CGROUP_MASK_V1;
827 mask = CGROUP_MASK_EXTEND_JOINED(mask);
828 done = 0;
829
830 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
831 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
832 const char *to = NULL;
833
834 if (!FLAGS_SET(supported, bit))
835 continue;
836
837 if (FLAGS_SET(done, bit))
838 continue;
839
840 if (!FLAGS_SET(mask, bit))
841 continue;
842
843 to = to_callback(bit, userdata);
844
845 /* Remember first error and try continuing */
846 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
847 r = (r < 0) ? r : q;
848
849 done |= CGROUP_MASK_EXTEND_JOINED(bit);
850 }
851
852 return r;
853 }
854
855 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
856 int r, q;
857
858 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
859 if (r < 0)
860 return r;
861
862 q = cg_all_unified();
863 if (q < 0)
864 return q;
865 if (q > 0)
866 return r;
867
868 return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
869 }
870
871 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
872 CGroupController c;
873 CGroupMask done;
874 int r = 0, q;
875
876 supported &= CGROUP_MASK_V1;
877 mask = CGROUP_MASK_EXTEND_JOINED(mask);
878 done = 0;
879
880 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
881 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
882
883 if (!FLAGS_SET(supported, bit))
884 continue;
885
886 if (FLAGS_SET(done, bit))
887 continue;
888
889 if (FLAGS_SET(mask, bit)) {
890 /* Remember first error and try continuing */
891 q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
892 r = (r < 0) ? r : q;
893 }
894 done |= CGROUP_MASK_EXTEND_JOINED(bit);
895 }
896
897 return r;
898 }
899
900 int cg_enable_everywhere(
901 CGroupMask supported,
902 CGroupMask mask,
903 const char *p,
904 CGroupMask *ret_result_mask) {
905
906 _cleanup_fclose_ FILE *f = NULL;
907 _cleanup_free_ char *fs = NULL;
908 CGroupController c;
909 CGroupMask ret = 0;
910 int r;
911
912 assert(p);
913
914 if (supported == 0) {
915 if (ret_result_mask)
916 *ret_result_mask = 0;
917 return 0;
918 }
919
920 r = cg_all_unified();
921 if (r < 0)
922 return r;
923 if (r == 0) {
924 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
925 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
926 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
927 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
928 * minimize surprises here and reduce triggers for re-realization by always saying we fully
929 * succeeded.) */
930 if (ret_result_mask)
931 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
932 * CGROUP_MASK_V2: The 'supported' mask
933 * might contain pure-V1 or BPF
934 * controllers, and we never want to
935 * claim that we could enable those with
936 * cgroup.subtree_control */
937 return 0;
938 }
939
940 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
941 if (r < 0)
942 return r;
943
944 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
945 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
946 const char *n;
947
948 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
949 continue;
950
951 if (!FLAGS_SET(supported, bit))
952 continue;
953
954 n = cgroup_controller_to_string(c);
955 {
956 char s[1 + strlen(n) + 1];
957
958 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
959 strcpy(s + 1, n);
960
961 if (!f) {
962 f = fopen(fs, "we");
963 if (!f)
964 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
965 }
966
967 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
968 if (r < 0) {
969 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
970 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
971 clearerr(f);
972
973 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
974 * happens for example when we attempt to turn off a controller up in the tree that is
975 * used down in the tree. */
976 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
977 * only here, and not follow the same logic
978 * for other errors such as EINVAL or
979 * EOPNOTSUPP or anything else. That's
980 * because EBUSY indicates that the
981 * controllers is currently enabled and
982 * cannot be disabled because something down
983 * the hierarchy is still using it. Any other
984 * error most likely means something like "I
985 * never heard of this controller" or
986 * similar. In the former case it's hence
987 * safe to assume the controller is still on
988 * after the failed operation, while in the
989 * latter case it's safer to assume the
990 * controller is unknown and hence certainly
991 * not enabled. */
992 ret |= bit;
993 } else {
994 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
995 if (FLAGS_SET(mask, bit))
996 ret |= bit;
997 }
998 }
999 }
1000
1001 /* Let's return the precise set of controllers now enabled for the cgroup. */
1002 if (ret_result_mask)
1003 *ret_result_mask = ret;
1004
1005 return 0;
1006 }