]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/cgroup-setup.c
hwdb: Add mapping for Xiaomi Mipad 2 bottom bezel capacitive buttons
[thirdparty/systemd.git] / src / shared / cgroup-setup.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <unistd.h>
4
5 #include "cgroup-setup.h"
6 #include "cgroup-util.h"
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "missing_threads.h"
12 #include "mkdir.h"
13 #include "parse-util.h"
14 #include "path-util.h"
15 #include "proc-cmdline.h"
16 #include "process-util.h"
17 #include "recurse-dir.h"
18 #include "stdio-util.h"
19 #include "string-util.h"
20 #include "user-util.h"
21 #include "virt.h"
22
23 static int cg_any_controller_used_for_v1(void) {
24 _cleanup_free_ char *buf = NULL;
25 _cleanup_strv_free_ char **lines = NULL;
26 int r;
27
28 r = read_full_virtual_file("/proc/cgroups", &buf, NULL);
29 if (r < 0)
30 return log_debug_errno(r, "Could not read /proc/cgroups, ignoring: %m");
31
32 r = strv_split_newlines_full(&lines, buf, 0);
33 if (r < 0)
34 return r;
35
36 /* The intention of this is to check if the fully unified cgroup tree setup is possible, meaning all
37 * enabled kernel cgroup controllers are currently not in use by cgroup1. For reference:
38 * https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-
39 *
40 * Note that this is typically only useful to check inside a container where we don't know what
41 * cgroup tree setup is in use by the host; if the host is using legacy or hybrid, we can't use
42 * unified since some or all controllers would be missing. This is not the best way to detect this,
43 * as whatever container manager created our container should have mounted /sys/fs/cgroup
44 * appropriately, but in case that wasn't done, we try to detect if it's possible for us to use
45 * unified cgroups. */
46 STRV_FOREACH(line, lines) {
47 _cleanup_free_ char *name = NULL, *hierarchy_id = NULL, *num = NULL, *enabled = NULL;
48
49 /* Skip header line */
50 if (startswith(*line, "#"))
51 continue;
52
53 const char *p = *line;
54 r = extract_many_words(&p, NULL, 0, &name, &hierarchy_id, &num, &enabled);
55 if (r < 0)
56 return log_debug_errno(r, "Error parsing /proc/cgroups line, ignoring: %m");
57 else if (r < 4) {
58 log_debug("Invalid /proc/cgroups line, ignoring.");
59 continue;
60 }
61
62 /* Ignore disabled controllers. */
63 if (streq(enabled, "0"))
64 continue;
65
66 /* Ignore controllers we don't care about. */
67 if (cgroup_controller_from_string(name) < 0)
68 continue;
69
70 /* Since the unified cgroup doesn't use multiple hierarchies, if any controller has a
71 * non-zero hierarchy_id that means it's in use already in a legacy (or hybrid) cgroup v1
72 * hierarchy, and can't be used in a unified cgroup. */
73 if (!streq(hierarchy_id, "0")) {
74 log_debug("Cgroup controller %s in use by legacy v1 hierarchy.", name);
75 return 1;
76 }
77 }
78
79 return 0;
80 }
81
82 bool cg_is_unified_wanted(void) {
83 static thread_local int wanted = -1;
84 int r;
85
86 /* If we have a cached value, return that. */
87 if (wanted >= 0)
88 return wanted;
89
90 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
91 r = cg_unified_cached(true);
92 if (r >= 0)
93 return (wanted = r >= CGROUP_UNIFIED_ALL);
94
95 /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
96 bool b;
97 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", /* flags = */ 0, &b);
98 if (r > 0)
99 return (wanted = b);
100
101 /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
102 * use hybrid or legacy hierarchy. */
103 _cleanup_free_ char *c = NULL;
104 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
105 if (r > 0 && streq_ptr(c, "all"))
106 return (wanted = true);
107
108 /* If any controller is in use as v1, don't use unified. */
109 return (wanted = (cg_any_controller_used_for_v1() <= 0));
110 }
111
112 bool cg_is_legacy_wanted(void) {
113 static thread_local int wanted = -1;
114
115 /* If we have a cached value, return that. */
116 if (wanted >= 0)
117 return wanted;
118
119 /* Check if we have cgroup v2 already mounted. */
120 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
121 return (wanted = false);
122
123 /* Otherwise, assume that at least partial legacy is wanted,
124 * since cgroup v2 should already be mounted at this point. */
125 return (wanted = true);
126 }
127
128 bool cg_is_hybrid_wanted(void) {
129 static thread_local int wanted = -1;
130 int r;
131
132 /* If we have a cached value, return that. */
133 if (wanted >= 0)
134 return wanted;
135
136 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
137 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
138 return (wanted = false);
139
140 /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
141 * a non-error result.
142 * The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
143 * negation. */
144 bool b;
145 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", /* flags = */ 0, &b);
146 if (r > 0)
147 return (wanted = !b);
148
149 /* The default hierarchy is "unified". But if this is reached, it means that unified hierarchy was
150 * not mounted, so return true too. */
151 return (wanted = true);
152 }
153
154 bool cg_is_legacy_force_enabled(void) {
155 bool force;
156
157 if (!cg_is_legacy_wanted())
158 return false;
159
160 /* If in container, we have to follow host's cgroup hierarchy. */
161 if (detect_container() > 0)
162 return true;
163
164 if (proc_cmdline_get_bool("SYSTEMD_CGROUP_ENABLE_LEGACY_FORCE", /* flags = */ 0, &force) < 0)
165 return false;
166
167 return force;
168 }
169
170 int cg_weight_parse(const char *s, uint64_t *ret) {
171 uint64_t u;
172 int r;
173
174 if (isempty(s)) {
175 *ret = CGROUP_WEIGHT_INVALID;
176 return 0;
177 }
178
179 r = safe_atou64(s, &u);
180 if (r < 0)
181 return r;
182
183 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
184 return -ERANGE;
185
186 *ret = u;
187 return 0;
188 }
189
190 int cg_cpu_weight_parse(const char *s, uint64_t *ret) {
191 if (streq_ptr(s, "idle"))
192 return *ret = CGROUP_WEIGHT_IDLE;
193 return cg_weight_parse(s, ret);
194 }
195
196 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
197 uint64_t u;
198 int r;
199
200 if (isempty(s)) {
201 *ret = CGROUP_CPU_SHARES_INVALID;
202 return 0;
203 }
204
205 r = safe_atou64(s, &u);
206 if (r < 0)
207 return r;
208
209 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
210 return -ERANGE;
211
212 *ret = u;
213 return 0;
214 }
215
216 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
217 uint64_t u;
218 int r;
219
220 if (isempty(s)) {
221 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
222 return 0;
223 }
224
225 r = safe_atou64(s, &u);
226 if (r < 0)
227 return r;
228
229 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
230 return -ERANGE;
231
232 *ret = u;
233 return 0;
234 }
235
236 static int trim_cb(
237 RecurseDirEvent event,
238 const char *path,
239 int dir_fd,
240 int inode_fd,
241 const struct dirent *de,
242 const struct statx *sx,
243 void *userdata) {
244
245 /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
246 if (event == RECURSE_DIR_LEAVE &&
247 de->d_type == DT_DIR &&
248 unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
249 !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
250 log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
251
252 return RECURSE_DIR_CONTINUE;
253 }
254
255 int cg_trim(const char *controller, const char *path, bool delete_root) {
256 _cleanup_free_ char *fs = NULL;
257 int r, q;
258
259 assert(path);
260 assert(controller);
261
262 r = cg_get_path(controller, path, NULL, &fs);
263 if (r < 0)
264 return r;
265
266 r = recurse_dir_at(
267 AT_FDCWD,
268 fs,
269 /* statx_mask= */ 0,
270 /* n_depth_max= */ UINT_MAX,
271 RECURSE_DIR_ENSURE_TYPE,
272 trim_cb,
273 NULL);
274 if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
275 r = 0;
276 else if (r < 0)
277 log_debug_errno(r, "Failed to iterate through cgroup %s: %m", path);
278
279 /* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
280 * already gone anyway). Also, let's debug log about this failure, except if the error code is an
281 * expected one. */
282 if (delete_root && !empty_or_root(path) &&
283 rmdir(fs) < 0 && errno != ENOENT) {
284 if (!IN_SET(errno, ENOTEMPTY, EBUSY))
285 log_debug_errno(errno, "Failed to trim cgroup %s: %m", path);
286 if (r >= 0)
287 r = -errno;
288 }
289
290 q = cg_hybrid_unified();
291 if (q < 0)
292 return q;
293 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
294 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
295
296 return r;
297 }
298
299 /* Create a cgroup in the hierarchy of controller.
300 * Returns 0 if the group already existed, 1 on success, negative otherwise.
301 */
302 int cg_create(const char *controller, const char *path) {
303 _cleanup_free_ char *fs = NULL;
304 int r;
305
306 r = cg_get_path_and_check(controller, path, NULL, &fs);
307 if (r < 0)
308 return r;
309
310 r = mkdir_parents(fs, 0755);
311 if (r < 0)
312 return r;
313
314 r = RET_NERRNO(mkdir(fs, 0755));
315 if (r == -EEXIST)
316 return 0;
317 if (r < 0)
318 return r;
319
320 r = cg_hybrid_unified();
321 if (r < 0)
322 return r;
323
324 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
325 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
326 if (r < 0)
327 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
328 }
329
330 return 1;
331 }
332
333 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
334 int r, q;
335
336 assert(pid >= 0);
337
338 r = cg_create(controller, path);
339 if (r < 0)
340 return r;
341
342 q = cg_attach(controller, path, pid);
343 if (q < 0)
344 return q;
345
346 /* This does not remove the cgroup on failure */
347 return r;
348 }
349
350 int cg_attach(const char *controller, const char *path, pid_t pid) {
351 _cleanup_free_ char *fs = NULL;
352 char c[DECIMAL_STR_MAX(pid_t) + 2];
353 int r;
354
355 assert(path);
356 assert(pid >= 0);
357
358 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
359 if (r < 0)
360 return r;
361
362 if (pid == 0)
363 pid = getpid_cached();
364
365 xsprintf(c, PID_FMT "\n", pid);
366
367 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
368 if (r == -EOPNOTSUPP && cg_is_threaded(path) > 0)
369 /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
370 return -EUCLEAN;
371 if (r < 0)
372 return r;
373
374 r = cg_hybrid_unified();
375 if (r < 0)
376 return r;
377
378 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
379 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
380 if (r < 0)
381 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
382 }
383
384 return 0;
385 }
386
387 int cg_fd_attach(int fd, pid_t pid) {
388 char c[DECIMAL_STR_MAX(pid_t) + 2];
389
390 assert(fd >= 0);
391 assert(pid >= 0);
392
393 if (pid == 0)
394 pid = getpid_cached();
395
396 xsprintf(c, PID_FMT "\n", pid);
397
398 return write_string_file_at(fd, "cgroup.procs", c, WRITE_STRING_FILE_DISABLE_BUFFER);
399 }
400
401 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
402 int r;
403
404 assert(controller);
405 assert(path);
406 assert(pid >= 0);
407
408 r = cg_attach(controller, path, pid);
409 if (r < 0) {
410 char prefix[strlen(path) + 1];
411
412 /* This didn't work? Then let's try all prefixes of
413 * the destination */
414
415 PATH_FOREACH_PREFIX(prefix, path) {
416 int q;
417
418 q = cg_attach(controller, prefix, pid);
419 if (q >= 0)
420 return q;
421 }
422 }
423
424 return r;
425 }
426
427 int cg_set_access(
428 const char *controller,
429 const char *path,
430 uid_t uid,
431 gid_t gid) {
432
433 struct Attribute {
434 const char *name;
435 bool fatal;
436 };
437
438 /* cgroup v1, aka legacy/non-unified */
439 static const struct Attribute legacy_attributes[] = {
440 { "cgroup.procs", true },
441 { "tasks", false },
442 { "cgroup.clone_children", false },
443 {},
444 };
445
446 /* cgroup v2, aka unified */
447 static const struct Attribute unified_attributes[] = {
448 { "cgroup.procs", true },
449 { "cgroup.subtree_control", true },
450 { "cgroup.threads", false },
451 { "memory.oom.group", false },
452 { "memory.reclaim", false },
453 {},
454 };
455
456 static const struct Attribute* const attributes[] = {
457 [false] = legacy_attributes,
458 [true] = unified_attributes,
459 };
460
461 _cleanup_free_ char *fs = NULL;
462 const struct Attribute *i;
463 int r, unified;
464
465 assert(path);
466
467 if (uid == UID_INVALID && gid == GID_INVALID)
468 return 0;
469
470 unified = cg_unified_controller(controller);
471 if (unified < 0)
472 return unified;
473
474 /* Configure access to the cgroup itself */
475 r = cg_get_path(controller, path, NULL, &fs);
476 if (r < 0)
477 return r;
478
479 r = chmod_and_chown(fs, 0755, uid, gid);
480 if (r < 0)
481 return r;
482
483 /* Configure access to the cgroup's attributes */
484 for (i = attributes[unified]; i->name; i++) {
485 fs = mfree(fs);
486
487 r = cg_get_path(controller, path, i->name, &fs);
488 if (r < 0)
489 return r;
490
491 r = chmod_and_chown(fs, 0644, uid, gid);
492 if (r < 0) {
493 if (i->fatal)
494 return r;
495
496 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
497 }
498 }
499
500 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
501 r = cg_hybrid_unified();
502 if (r < 0)
503 return r;
504 if (r > 0) {
505 /* Always propagate access mode from unified to legacy controller */
506 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
507 if (r < 0)
508 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
509 }
510 }
511
512 return 0;
513 }
514
515 struct access_callback_data {
516 uid_t uid;
517 gid_t gid;
518 int error;
519 };
520
521 static int access_callback(
522 RecurseDirEvent event,
523 const char *path,
524 int dir_fd,
525 int inode_fd,
526 const struct dirent *de,
527 const struct statx *sx,
528 void *userdata) {
529
530 struct access_callback_data *d = ASSERT_PTR(userdata);
531
532 if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
533 return RECURSE_DIR_CONTINUE;
534
535 assert(inode_fd >= 0);
536
537 /* fchown() doesn't support O_PATH fds, hence we use the /proc/self/fd/ trick */
538 if (chown(FORMAT_PROC_FD_PATH(inode_fd), d->uid, d->gid) < 0) {
539 log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", ASSERT_PTR(path));
540
541 if (d->error == 0) /* Return last error to caller */
542 d->error = errno;
543 }
544
545 return RECURSE_DIR_CONTINUE;
546 }
547
548 int cg_set_access_recursive(
549 const char *controller,
550 const char *path,
551 uid_t uid,
552 gid_t gid) {
553
554 _cleanup_close_ int fd = -EBADF;
555 _cleanup_free_ char *fs = NULL;
556 int r;
557
558 /* A recursive version of cg_set_access(). But note that this one changes ownership of *all* files,
559 * not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
560 * delegate, and cg_set_access_recursive() for any subcrgoups you might want to create below it. */
561
562 if (!uid_is_valid(uid) && !gid_is_valid(gid))
563 return 0;
564
565 r = cg_get_path(controller, path, NULL, &fs);
566 if (r < 0)
567 return r;
568
569 fd = open(fs, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
570 if (fd < 0)
571 return -errno;
572
573 struct access_callback_data d = {
574 .uid = uid,
575 .gid = gid,
576 };
577
578 r = recurse_dir(fd,
579 fs,
580 /* statx_mask= */ 0,
581 /* n_depth_max= */ UINT_MAX,
582 RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
583 access_callback,
584 &d);
585 if (r < 0)
586 return r;
587
588 return -d.error;
589 }
590
591 int cg_migrate(
592 const char *cfrom,
593 const char *pfrom,
594 const char *cto,
595 const char *pto,
596 CGroupFlags flags) {
597
598 bool done = false;
599 _cleanup_set_free_ Set *s = NULL;
600 int r, ret = 0;
601
602 assert(cfrom);
603 assert(pfrom);
604 assert(cto);
605 assert(pto);
606
607 do {
608 _cleanup_fclose_ FILE *f = NULL;
609 pid_t pid;
610
611 done = true;
612
613 r = cg_enumerate_processes(cfrom, pfrom, &f);
614 if (r < 0)
615 return RET_GATHER(ret, r);
616
617 while ((r = cg_read_pid(f, &pid)) > 0) {
618 /* This might do weird stuff if we aren't a single-threaded program. However, we
619 * luckily know we are. */
620 if (FLAGS_SET(flags, CGROUP_IGNORE_SELF) && pid == getpid_cached())
621 continue;
622
623 if (set_contains(s, PID_TO_PTR(pid)))
624 continue;
625
626 /* Ignore kernel threads. Since they can only exist in the root cgroup, we only
627 * check for them there. */
628 if (cfrom && empty_or_root(pfrom) &&
629 pid_is_kernel_thread(pid) > 0)
630 continue;
631
632 r = cg_attach(cto, pto, pid);
633 if (r < 0) {
634 if (r != -ESRCH)
635 RET_GATHER(ret, r);
636 } else if (ret == 0)
637 ret = 1;
638
639 done = false;
640
641 r = set_ensure_put(&s, /* hash_ops = */ NULL, PID_TO_PTR(pid));
642 if (r < 0)
643 return RET_GATHER(ret, r);
644 }
645 if (r < 0)
646 return RET_GATHER(ret, r);
647 } while (!done);
648
649 return ret;
650 }
651
652 int cg_migrate_recursive(
653 const char *cfrom,
654 const char *pfrom,
655 const char *cto,
656 const char *pto,
657 CGroupFlags flags) {
658
659 _cleanup_closedir_ DIR *d = NULL;
660 int r, ret = 0;
661 char *fn;
662
663 assert(cfrom);
664 assert(pfrom);
665 assert(cto);
666 assert(pto);
667
668 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
669
670 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
671 if (r < 0) {
672 if (ret >= 0 && r != -ENOENT)
673 return r;
674
675 return ret;
676 }
677
678 while ((r = cg_read_subgroup(d, &fn)) > 0) {
679 _cleanup_free_ char *p = NULL;
680
681 p = path_join(empty_to_root(pfrom), fn);
682 free(fn);
683 if (!p)
684 return -ENOMEM;
685
686 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
687 if (r != 0 && ret >= 0)
688 ret = r;
689 }
690
691 if (r < 0 && ret >= 0)
692 ret = r;
693
694 if (flags & CGROUP_REMOVE) {
695 r = cg_rmdir(cfrom, pfrom);
696 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
697 return r;
698 }
699
700 return ret;
701 }
702
703 int cg_migrate_recursive_fallback(
704 const char *cfrom,
705 const char *pfrom,
706 const char *cto,
707 const char *pto,
708 CGroupFlags flags) {
709
710 int r;
711
712 assert(cfrom);
713 assert(pfrom);
714 assert(cto);
715 assert(pto);
716
717 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
718 if (r < 0) {
719 char prefix[strlen(pto) + 1];
720
721 /* This didn't work? Then let's try all prefixes of the destination */
722
723 PATH_FOREACH_PREFIX(prefix, pto) {
724 int q;
725
726 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
727 if (q >= 0)
728 return q;
729 }
730 }
731
732 return r;
733 }
734
735 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
736 CGroupController c;
737 CGroupMask done;
738 bool created;
739 int r;
740
741 /* This one will create a cgroup in our private tree, but also
742 * duplicate it in the trees specified in mask, and remove it
743 * in all others.
744 *
745 * Returns 0 if the group already existed in the systemd hierarchy,
746 * 1 on success, negative otherwise.
747 */
748
749 /* First create the cgroup in our own hierarchy. */
750 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
751 if (r < 0)
752 return r;
753 created = r;
754
755 /* If we are in the unified hierarchy, we are done now */
756 r = cg_all_unified();
757 if (r < 0)
758 return r;
759 if (r > 0)
760 return created;
761
762 supported &= CGROUP_MASK_V1;
763 mask = CGROUP_MASK_EXTEND_JOINED(mask);
764 done = 0;
765
766 /* Otherwise, do the same in the other hierarchies */
767 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
768 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
769 const char *n;
770
771 if (!FLAGS_SET(supported, bit))
772 continue;
773
774 if (FLAGS_SET(done, bit))
775 continue;
776
777 n = cgroup_controller_to_string(c);
778 if (FLAGS_SET(mask, bit))
779 (void) cg_create(n, path);
780
781 done |= CGROUP_MASK_EXTEND_JOINED(bit);
782 }
783
784 return created;
785 }
786
787 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
788 int r;
789
790 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
791 if (r < 0)
792 return r;
793
794 r = cg_all_unified();
795 if (r < 0)
796 return r;
797 if (r > 0)
798 return 0;
799
800 supported &= CGROUP_MASK_V1;
801 CGroupMask done = 0;
802
803 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
804 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
805 const char *p = NULL;
806
807 if (!FLAGS_SET(supported, bit))
808 continue;
809
810 if (FLAGS_SET(done, bit))
811 continue;
812
813 if (path_callback)
814 p = path_callback(bit, userdata);
815 if (!p)
816 p = path;
817
818 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
819 done |= CGROUP_MASK_EXTEND_JOINED(bit);
820 }
821
822 return 0;
823 }
824
825 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
826 CGroupController c;
827 CGroupMask done;
828 int r = 0, q;
829
830 assert(to_callback);
831
832 supported &= CGROUP_MASK_V1;
833 mask = CGROUP_MASK_EXTEND_JOINED(mask);
834 done = 0;
835
836 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
837 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
838 const char *to = NULL;
839
840 if (!FLAGS_SET(supported, bit))
841 continue;
842
843 if (FLAGS_SET(done, bit))
844 continue;
845
846 if (!FLAGS_SET(mask, bit))
847 continue;
848
849 to = to_callback(bit, userdata);
850
851 /* Remember first error and try continuing */
852 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
853 r = (r < 0) ? r : q;
854
855 done |= CGROUP_MASK_EXTEND_JOINED(bit);
856 }
857
858 return r;
859 }
860
861 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
862 int r, q;
863
864 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
865 if (r < 0)
866 return r;
867
868 q = cg_all_unified();
869 if (q < 0)
870 return q;
871 if (q > 0)
872 return r;
873
874 return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
875 }
876
877 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
878 CGroupController c;
879 CGroupMask done;
880 int r = 0, q;
881
882 supported &= CGROUP_MASK_V1;
883 mask = CGROUP_MASK_EXTEND_JOINED(mask);
884 done = 0;
885
886 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
887 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
888
889 if (!FLAGS_SET(supported, bit))
890 continue;
891
892 if (FLAGS_SET(done, bit))
893 continue;
894
895 if (FLAGS_SET(mask, bit)) {
896 /* Remember first error and try continuing */
897 q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
898 r = (r < 0) ? r : q;
899 }
900 done |= CGROUP_MASK_EXTEND_JOINED(bit);
901 }
902
903 return r;
904 }
905
906 int cg_enable_everywhere(
907 CGroupMask supported,
908 CGroupMask mask,
909 const char *p,
910 CGroupMask *ret_result_mask) {
911
912 _cleanup_fclose_ FILE *f = NULL;
913 _cleanup_free_ char *fs = NULL;
914 CGroupController c;
915 CGroupMask ret = 0;
916 int r;
917
918 assert(p);
919
920 if (supported == 0) {
921 if (ret_result_mask)
922 *ret_result_mask = 0;
923 return 0;
924 }
925
926 r = cg_all_unified();
927 if (r < 0)
928 return r;
929 if (r == 0) {
930 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
931 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
932 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
933 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
934 * minimize surprises here and reduce triggers for re-realization by always saying we fully
935 * succeeded.) */
936 if (ret_result_mask)
937 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
938 * CGROUP_MASK_V2: The 'supported' mask
939 * might contain pure-V1 or BPF
940 * controllers, and we never want to
941 * claim that we could enable those with
942 * cgroup.subtree_control */
943 return 0;
944 }
945
946 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
947 if (r < 0)
948 return r;
949
950 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
951 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
952 const char *n;
953
954 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
955 continue;
956
957 if (!FLAGS_SET(supported, bit))
958 continue;
959
960 n = cgroup_controller_to_string(c);
961 {
962 char s[1 + strlen(n) + 1];
963
964 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
965 strcpy(s + 1, n);
966
967 if (!f) {
968 f = fopen(fs, "we");
969 if (!f)
970 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
971 }
972
973 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
974 if (r < 0) {
975 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
976 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
977 clearerr(f);
978
979 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
980 * happens for example when we attempt to turn off a controller up in the tree that is
981 * used down in the tree. */
982 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
983 * only here, and not follow the same logic
984 * for other errors such as EINVAL or
985 * EOPNOTSUPP or anything else. That's
986 * because EBUSY indicates that the
987 * controllers is currently enabled and
988 * cannot be disabled because something down
989 * the hierarchy is still using it. Any other
990 * error most likely means something like "I
991 * never heard of this controller" or
992 * similar. In the former case it's hence
993 * safe to assume the controller is still on
994 * after the failed operation, while in the
995 * latter case it's safer to assume the
996 * controller is unknown and hence certainly
997 * not enabled. */
998 ret |= bit;
999 } else {
1000 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
1001 if (FLAGS_SET(mask, bit))
1002 ret |= bit;
1003 }
1004 }
1005 }
1006
1007 /* Let's return the precise set of controllers now enabled for the cgroup. */
1008 if (ret_result_mask)
1009 *ret_result_mask = ret;
1010
1011 return 0;
1012 }