]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <dirent.h>
4 #include <errno.h>
5 #include <ftw.h>
6 #include <limits.h>
7 #include <signal.h>
8 #include <stddef.h>
9 #include <stdio_ext.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/stat.h>
13 #include <sys/statfs.h>
14 #include <sys/types.h>
15 #include <sys/xattr.h>
16 #include <unistd.h>
17
18 #include "alloc-util.h"
19 #include "cgroup-util.h"
20 #include "def.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "format-util.h"
26 #include "fs-util.h"
27 #include "log.h"
28 #include "login-util.h"
29 #include "macro.h"
30 #include "missing.h"
31 #include "mkdir.h"
32 #include "parse-util.h"
33 #include "path-util.h"
34 #include "proc-cmdline.h"
35 #include "process-util.h"
36 #include "set.h"
37 #include "special.h"
38 #include "stat-util.h"
39 #include "stdio-util.h"
40 #include "string-table.h"
41 #include "string-util.h"
42 #include "strv.h"
43 #include "unit-name.h"
44 #include "user-util.h"
45
46 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
47 _cleanup_free_ char *fs = NULL;
48 FILE *f;
49 int r;
50
51 assert(_f);
52
53 r = cg_get_path(controller, path, "cgroup.procs", &fs);
54 if (r < 0)
55 return r;
56
57 f = fopen(fs, "re");
58 if (!f)
59 return -errno;
60
61 *_f = f;
62 return 0;
63 }
64
65 int cg_read_pid(FILE *f, pid_t *_pid) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
70
71 assert(f);
72 assert(_pid);
73
74 errno = 0;
75 if (fscanf(f, "%lu", &ul) != 1) {
76
77 if (feof(f))
78 return 0;
79
80 return errno > 0 ? -errno : -EIO;
81 }
82
83 if (ul <= 0)
84 return -EIO;
85
86 *_pid = (pid_t) ul;
87 return 1;
88 }
89
90 int cg_read_event(
91 const char *controller,
92 const char *path,
93 const char *event,
94 char **val) {
95
96 _cleanup_free_ char *events = NULL, *content = NULL;
97 char *p, *line;
98 int r;
99
100 r = cg_get_path(controller, path, "cgroup.events", &events);
101 if (r < 0)
102 return r;
103
104 r = read_full_file(events, &content, NULL);
105 if (r < 0)
106 return r;
107
108 p = content;
109 while ((line = strsep(&p, "\n"))) {
110 char *key;
111
112 key = strsep(&line, " ");
113 if (!key || !line)
114 return -EINVAL;
115
116 if (strcmp(key, event))
117 continue;
118
119 *val = strdup(line);
120 return 0;
121 }
122
123 return -ENOENT;
124 }
125
126 bool cg_ns_supported(void) {
127 static thread_local int enabled = -1;
128
129 if (enabled >= 0)
130 return enabled;
131
132 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
133 if (errno != ENOENT)
134 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
135 enabled = false;
136 } else
137 enabled = true;
138
139 return enabled;
140 }
141
142 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
143 _cleanup_free_ char *fs = NULL;
144 int r;
145 DIR *d;
146
147 assert(_d);
148
149 /* This is not recursive! */
150
151 r = cg_get_path(controller, path, NULL, &fs);
152 if (r < 0)
153 return r;
154
155 d = opendir(fs);
156 if (!d)
157 return -errno;
158
159 *_d = d;
160 return 0;
161 }
162
163 int cg_read_subgroup(DIR *d, char **fn) {
164 struct dirent *de;
165
166 assert(d);
167 assert(fn);
168
169 FOREACH_DIRENT_ALL(de, d, return -errno) {
170 char *b;
171
172 if (de->d_type != DT_DIR)
173 continue;
174
175 if (dot_or_dot_dot(de->d_name))
176 continue;
177
178 b = strdup(de->d_name);
179 if (!b)
180 return -ENOMEM;
181
182 *fn = b;
183 return 1;
184 }
185
186 return 0;
187 }
188
189 int cg_rmdir(const char *controller, const char *path) {
190 _cleanup_free_ char *p = NULL;
191 int r;
192
193 r = cg_get_path(controller, path, NULL, &p);
194 if (r < 0)
195 return r;
196
197 r = rmdir(p);
198 if (r < 0 && errno != ENOENT)
199 return -errno;
200
201 r = cg_hybrid_unified();
202 if (r <= 0)
203 return r;
204
205 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
206 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
207 if (r < 0)
208 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
209 }
210
211 return 0;
212 }
213
214 int cg_kill(
215 const char *controller,
216 const char *path,
217 int sig,
218 CGroupFlags flags,
219 Set *s,
220 cg_kill_log_func_t log_kill,
221 void *userdata) {
222
223 _cleanup_set_free_ Set *allocated_set = NULL;
224 bool done = false;
225 int r, ret = 0;
226 pid_t my_pid;
227
228 assert(sig >= 0);
229
230 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
231 * SIGCONT on SIGKILL. */
232 if (IN_SET(sig, SIGCONT, SIGKILL))
233 flags &= ~CGROUP_SIGCONT;
234
235 /* This goes through the tasks list and kills them all. This
236 * is repeated until no further processes are added to the
237 * tasks list, to properly handle forking processes */
238
239 if (!s) {
240 s = allocated_set = set_new(NULL);
241 if (!s)
242 return -ENOMEM;
243 }
244
245 my_pid = getpid_cached();
246
247 do {
248 _cleanup_fclose_ FILE *f = NULL;
249 pid_t pid = 0;
250 done = true;
251
252 r = cg_enumerate_processes(controller, path, &f);
253 if (r < 0) {
254 if (ret >= 0 && r != -ENOENT)
255 return r;
256
257 return ret;
258 }
259
260 while ((r = cg_read_pid(f, &pid)) > 0) {
261
262 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
263 continue;
264
265 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
266 continue;
267
268 if (log_kill)
269 log_kill(pid, sig, userdata);
270
271 /* If we haven't killed this process yet, kill
272 * it */
273 if (kill(pid, sig) < 0) {
274 if (ret >= 0 && errno != ESRCH)
275 ret = -errno;
276 } else {
277 if (flags & CGROUP_SIGCONT)
278 (void) kill(pid, SIGCONT);
279
280 if (ret == 0)
281 ret = 1;
282 }
283
284 done = false;
285
286 r = set_put(s, PID_TO_PTR(pid));
287 if (r < 0) {
288 if (ret >= 0)
289 return r;
290
291 return ret;
292 }
293 }
294
295 if (r < 0) {
296 if (ret >= 0)
297 return r;
298
299 return ret;
300 }
301
302 /* To avoid racing against processes which fork
303 * quicker than we can kill them we repeat this until
304 * no new pids need to be killed. */
305
306 } while (!done);
307
308 return ret;
309 }
310
311 int cg_kill_recursive(
312 const char *controller,
313 const char *path,
314 int sig,
315 CGroupFlags flags,
316 Set *s,
317 cg_kill_log_func_t log_kill,
318 void *userdata) {
319
320 _cleanup_set_free_ Set *allocated_set = NULL;
321 _cleanup_closedir_ DIR *d = NULL;
322 int r, ret;
323 char *fn;
324
325 assert(path);
326 assert(sig >= 0);
327
328 if (!s) {
329 s = allocated_set = set_new(NULL);
330 if (!s)
331 return -ENOMEM;
332 }
333
334 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
335
336 r = cg_enumerate_subgroups(controller, path, &d);
337 if (r < 0) {
338 if (ret >= 0 && r != -ENOENT)
339 return r;
340
341 return ret;
342 }
343
344 while ((r = cg_read_subgroup(d, &fn)) > 0) {
345 _cleanup_free_ char *p = NULL;
346
347 p = strjoin(path, "/", fn);
348 free(fn);
349 if (!p)
350 return -ENOMEM;
351
352 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
353 if (r != 0 && ret >= 0)
354 ret = r;
355 }
356 if (ret >= 0 && r < 0)
357 ret = r;
358
359 if (flags & CGROUP_REMOVE) {
360 r = cg_rmdir(controller, path);
361 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
362 return r;
363 }
364
365 return ret;
366 }
367
368 int cg_migrate(
369 const char *cfrom,
370 const char *pfrom,
371 const char *cto,
372 const char *pto,
373 CGroupFlags flags) {
374
375 bool done = false;
376 _cleanup_set_free_ Set *s = NULL;
377 int r, ret = 0;
378 pid_t my_pid;
379
380 assert(cfrom);
381 assert(pfrom);
382 assert(cto);
383 assert(pto);
384
385 s = set_new(NULL);
386 if (!s)
387 return -ENOMEM;
388
389 my_pid = getpid_cached();
390
391 do {
392 _cleanup_fclose_ FILE *f = NULL;
393 pid_t pid = 0;
394 done = true;
395
396 r = cg_enumerate_processes(cfrom, pfrom, &f);
397 if (r < 0) {
398 if (ret >= 0 && r != -ENOENT)
399 return r;
400
401 return ret;
402 }
403
404 while ((r = cg_read_pid(f, &pid)) > 0) {
405
406 /* This might do weird stuff if we aren't a
407 * single-threaded program. However, we
408 * luckily know we are not */
409 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
410 continue;
411
412 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
413 continue;
414
415 /* Ignore kernel threads. Since they can only
416 * exist in the root cgroup, we only check for
417 * them there. */
418 if (cfrom &&
419 empty_or_root(pfrom) &&
420 is_kernel_thread(pid) > 0)
421 continue;
422
423 r = cg_attach(cto, pto, pid);
424 if (r < 0) {
425 if (ret >= 0 && r != -ESRCH)
426 ret = r;
427 } else if (ret == 0)
428 ret = 1;
429
430 done = false;
431
432 r = set_put(s, PID_TO_PTR(pid));
433 if (r < 0) {
434 if (ret >= 0)
435 return r;
436
437 return ret;
438 }
439 }
440
441 if (r < 0) {
442 if (ret >= 0)
443 return r;
444
445 return ret;
446 }
447 } while (!done);
448
449 return ret;
450 }
451
452 int cg_migrate_recursive(
453 const char *cfrom,
454 const char *pfrom,
455 const char *cto,
456 const char *pto,
457 CGroupFlags flags) {
458
459 _cleanup_closedir_ DIR *d = NULL;
460 int r, ret = 0;
461 char *fn;
462
463 assert(cfrom);
464 assert(pfrom);
465 assert(cto);
466 assert(pto);
467
468 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
469
470 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
471 if (r < 0) {
472 if (ret >= 0 && r != -ENOENT)
473 return r;
474
475 return ret;
476 }
477
478 while ((r = cg_read_subgroup(d, &fn)) > 0) {
479 _cleanup_free_ char *p = NULL;
480
481 p = strjoin(pfrom, "/", fn);
482 free(fn);
483 if (!p)
484 return -ENOMEM;
485
486 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
487 if (r != 0 && ret >= 0)
488 ret = r;
489 }
490
491 if (r < 0 && ret >= 0)
492 ret = r;
493
494 if (flags & CGROUP_REMOVE) {
495 r = cg_rmdir(cfrom, pfrom);
496 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
497 return r;
498 }
499
500 return ret;
501 }
502
503 int cg_migrate_recursive_fallback(
504 const char *cfrom,
505 const char *pfrom,
506 const char *cto,
507 const char *pto,
508 CGroupFlags flags) {
509
510 int r;
511
512 assert(cfrom);
513 assert(pfrom);
514 assert(cto);
515 assert(pto);
516
517 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
518 if (r < 0) {
519 char prefix[strlen(pto) + 1];
520
521 /* This didn't work? Then let's try all prefixes of the destination */
522
523 PATH_FOREACH_PREFIX(prefix, pto) {
524 int q;
525
526 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
527 if (q >= 0)
528 return q;
529 }
530 }
531
532 return r;
533 }
534
535 static const char *controller_to_dirname(const char *controller) {
536 const char *e;
537
538 assert(controller);
539
540 /* Converts a controller name to the directory name below
541 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
542 * just cuts off the name= prefixed used for named
543 * hierarchies, if it is specified. */
544
545 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
546 if (cg_hybrid_unified() > 0)
547 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
548 else
549 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
550 }
551
552 e = startswith(controller, "name=");
553 if (e)
554 return e;
555
556 return controller;
557 }
558
559 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
560 const char *dn;
561 char *t = NULL;
562
563 assert(fs);
564 assert(controller);
565
566 dn = controller_to_dirname(controller);
567
568 if (isempty(path) && isempty(suffix))
569 t = strappend("/sys/fs/cgroup/", dn);
570 else if (isempty(path))
571 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
572 else if (isempty(suffix))
573 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
574 else
575 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
576 if (!t)
577 return -ENOMEM;
578
579 *fs = t;
580 return 0;
581 }
582
583 static int join_path_unified(const char *path, const char *suffix, char **fs) {
584 char *t;
585
586 assert(fs);
587
588 if (isempty(path) && isempty(suffix))
589 t = strdup("/sys/fs/cgroup");
590 else if (isempty(path))
591 t = strappend("/sys/fs/cgroup/", suffix);
592 else if (isempty(suffix))
593 t = strappend("/sys/fs/cgroup/", path);
594 else
595 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
596 if (!t)
597 return -ENOMEM;
598
599 *fs = t;
600 return 0;
601 }
602
603 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
604 int r;
605
606 assert(fs);
607
608 if (!controller) {
609 char *t;
610
611 /* If no controller is specified, we return the path
612 * *below* the controllers, without any prefix. */
613
614 if (!path && !suffix)
615 return -EINVAL;
616
617 if (!suffix)
618 t = strdup(path);
619 else if (!path)
620 t = strdup(suffix);
621 else
622 t = strjoin(path, "/", suffix);
623 if (!t)
624 return -ENOMEM;
625
626 *fs = path_simplify(t, false);
627 return 0;
628 }
629
630 if (!cg_controller_is_valid(controller))
631 return -EINVAL;
632
633 r = cg_all_unified();
634 if (r < 0)
635 return r;
636 if (r > 0)
637 r = join_path_unified(path, suffix, fs);
638 else
639 r = join_path_legacy(controller, path, suffix, fs);
640 if (r < 0)
641 return r;
642
643 path_simplify(*fs, false);
644 return 0;
645 }
646
647 static int controller_is_accessible(const char *controller) {
648 int r;
649
650 assert(controller);
651
652 /* Checks whether a specific controller is accessible,
653 * i.e. its hierarchy mounted. In the unified hierarchy all
654 * controllers are considered accessible, except for the named
655 * hierarchies */
656
657 if (!cg_controller_is_valid(controller))
658 return -EINVAL;
659
660 r = cg_all_unified();
661 if (r < 0)
662 return r;
663 if (r > 0) {
664 /* We don't support named hierarchies if we are using
665 * the unified hierarchy. */
666
667 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
668 return 0;
669
670 if (startswith(controller, "name="))
671 return -EOPNOTSUPP;
672
673 } else {
674 const char *cc, *dn;
675
676 dn = controller_to_dirname(controller);
677 cc = strjoina("/sys/fs/cgroup/", dn);
678
679 if (laccess(cc, F_OK) < 0)
680 return -errno;
681 }
682
683 return 0;
684 }
685
686 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
687 int r;
688
689 assert(controller);
690 assert(fs);
691
692 /* Check if the specified controller is actually accessible */
693 r = controller_is_accessible(controller);
694 if (r < 0)
695 return r;
696
697 return cg_get_path(controller, path, suffix, fs);
698 }
699
700 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
701 assert(path);
702 assert(sb);
703 assert(ftwbuf);
704
705 if (typeflag != FTW_DP)
706 return 0;
707
708 if (ftwbuf->level < 1)
709 return 0;
710
711 (void) rmdir(path);
712 return 0;
713 }
714
715 int cg_trim(const char *controller, const char *path, bool delete_root) {
716 _cleanup_free_ char *fs = NULL;
717 int r = 0, q;
718
719 assert(path);
720
721 r = cg_get_path(controller, path, NULL, &fs);
722 if (r < 0)
723 return r;
724
725 errno = 0;
726 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
727 if (errno == ENOENT)
728 r = 0;
729 else if (errno > 0)
730 r = -errno;
731 else
732 r = -EIO;
733 }
734
735 if (delete_root) {
736 if (rmdir(fs) < 0 && errno != ENOENT)
737 return -errno;
738 }
739
740 q = cg_hybrid_unified();
741 if (q < 0)
742 return q;
743 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
744 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
745 if (q < 0)
746 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
747 }
748
749 return r;
750 }
751
752 /* Create a cgroup in the hierarchy of controller.
753 * Returns 0 if the group already existed, 1 on success, negative otherwise.
754 */
755 int cg_create(const char *controller, const char *path) {
756 _cleanup_free_ char *fs = NULL;
757 int r;
758
759 r = cg_get_path_and_check(controller, path, NULL, &fs);
760 if (r < 0)
761 return r;
762
763 r = mkdir_parents(fs, 0755);
764 if (r < 0)
765 return r;
766
767 r = mkdir_errno_wrapper(fs, 0755);
768 if (r == -EEXIST)
769 return 0;
770 if (r < 0)
771 return r;
772
773 r = cg_hybrid_unified();
774 if (r < 0)
775 return r;
776
777 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
778 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
779 if (r < 0)
780 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
781 }
782
783 return 1;
784 }
785
786 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
787 int r, q;
788
789 assert(pid >= 0);
790
791 r = cg_create(controller, path);
792 if (r < 0)
793 return r;
794
795 q = cg_attach(controller, path, pid);
796 if (q < 0)
797 return q;
798
799 /* This does not remove the cgroup on failure */
800 return r;
801 }
802
803 int cg_attach(const char *controller, const char *path, pid_t pid) {
804 _cleanup_free_ char *fs = NULL;
805 char c[DECIMAL_STR_MAX(pid_t) + 2];
806 int r;
807
808 assert(path);
809 assert(pid >= 0);
810
811 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
812 if (r < 0)
813 return r;
814
815 if (pid == 0)
816 pid = getpid_cached();
817
818 xsprintf(c, PID_FMT "\n", pid);
819
820 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
821 if (r < 0)
822 return r;
823
824 r = cg_hybrid_unified();
825 if (r < 0)
826 return r;
827
828 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
829 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
830 if (r < 0)
831 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
832 }
833
834 return 0;
835 }
836
837 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
838 int r;
839
840 assert(controller);
841 assert(path);
842 assert(pid >= 0);
843
844 r = cg_attach(controller, path, pid);
845 if (r < 0) {
846 char prefix[strlen(path) + 1];
847
848 /* This didn't work? Then let's try all prefixes of
849 * the destination */
850
851 PATH_FOREACH_PREFIX(prefix, path) {
852 int q;
853
854 q = cg_attach(controller, prefix, pid);
855 if (q >= 0)
856 return q;
857 }
858 }
859
860 return r;
861 }
862
863 int cg_set_access(
864 const char *controller,
865 const char *path,
866 uid_t uid,
867 gid_t gid) {
868
869 struct Attribute {
870 const char *name;
871 bool fatal;
872 };
873
874 /* cgroupsv1, aka legacy/non-unified */
875 static const struct Attribute legacy_attributes[] = {
876 { "cgroup.procs", true },
877 { "tasks", false },
878 { "cgroup.clone_children", false },
879 {},
880 };
881
882 /* cgroupsv2, aka unified */
883 static const struct Attribute unified_attributes[] = {
884 { "cgroup.procs", true },
885 { "cgroup.subtree_control", true },
886 { "cgroup.threads", false },
887 {},
888 };
889
890 static const struct Attribute* const attributes[] = {
891 [false] = legacy_attributes,
892 [true] = unified_attributes,
893 };
894
895 _cleanup_free_ char *fs = NULL;
896 const struct Attribute *i;
897 int r, unified;
898
899 assert(path);
900
901 if (uid == UID_INVALID && gid == GID_INVALID)
902 return 0;
903
904 unified = cg_unified_controller(controller);
905 if (unified < 0)
906 return unified;
907
908 /* Configure access to the cgroup itself */
909 r = cg_get_path(controller, path, NULL, &fs);
910 if (r < 0)
911 return r;
912
913 r = chmod_and_chown(fs, 0755, uid, gid);
914 if (r < 0)
915 return r;
916
917 /* Configure access to the cgroup's attributes */
918 for (i = attributes[unified]; i->name; i++) {
919 fs = mfree(fs);
920
921 r = cg_get_path(controller, path, i->name, &fs);
922 if (r < 0)
923 return r;
924
925 r = chmod_and_chown(fs, 0644, uid, gid);
926 if (r < 0) {
927 if (i->fatal)
928 return r;
929
930 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
931 }
932 }
933
934 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
935 r = cg_hybrid_unified();
936 if (r < 0)
937 return r;
938 if (r > 0) {
939 /* Always propagate access mode from unified to legacy controller */
940 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
941 if (r < 0)
942 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
943 }
944 }
945
946 return 0;
947 }
948
949 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
950 _cleanup_free_ char *fs = NULL;
951 int r;
952
953 assert(path);
954 assert(name);
955 assert(value || size <= 0);
956
957 r = cg_get_path(controller, path, NULL, &fs);
958 if (r < 0)
959 return r;
960
961 if (setxattr(fs, name, value, size, flags) < 0)
962 return -errno;
963
964 return 0;
965 }
966
967 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
968 _cleanup_free_ char *fs = NULL;
969 ssize_t n;
970 int r;
971
972 assert(path);
973 assert(name);
974
975 r = cg_get_path(controller, path, NULL, &fs);
976 if (r < 0)
977 return r;
978
979 n = getxattr(fs, name, value, size);
980 if (n < 0)
981 return -errno;
982
983 return (int) n;
984 }
985
986 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
987 _cleanup_fclose_ FILE *f = NULL;
988 const char *fs, *controller_str;
989 int unified, r;
990 size_t cs = 0;
991
992 assert(path);
993 assert(pid >= 0);
994
995 if (controller) {
996 if (!cg_controller_is_valid(controller))
997 return -EINVAL;
998 } else
999 controller = SYSTEMD_CGROUP_CONTROLLER;
1000
1001 unified = cg_unified_controller(controller);
1002 if (unified < 0)
1003 return unified;
1004 if (unified == 0) {
1005 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1006 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1007 else
1008 controller_str = controller;
1009
1010 cs = strlen(controller_str);
1011 }
1012
1013 fs = procfs_file_alloca(pid, "cgroup");
1014 f = fopen(fs, "re");
1015 if (!f)
1016 return errno == ENOENT ? -ESRCH : -errno;
1017
1018 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1019
1020 for (;;) {
1021 _cleanup_free_ char *line = NULL;
1022 char *e, *p;
1023
1024 r = read_line(f, LONG_LINE_MAX, &line);
1025 if (r < 0)
1026 return r;
1027 if (r == 0)
1028 break;
1029
1030 if (unified) {
1031 e = startswith(line, "0:");
1032 if (!e)
1033 continue;
1034
1035 e = strchr(e, ':');
1036 if (!e)
1037 continue;
1038 } else {
1039 char *l;
1040 size_t k;
1041 const char *word, *state;
1042 bool found = false;
1043
1044 l = strchr(line, ':');
1045 if (!l)
1046 continue;
1047
1048 l++;
1049 e = strchr(l, ':');
1050 if (!e)
1051 continue;
1052
1053 *e = 0;
1054 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
1055 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1056 found = true;
1057 break;
1058 }
1059 if (!found)
1060 continue;
1061 }
1062
1063 p = strdup(e + 1);
1064 if (!p)
1065 return -ENOMEM;
1066
1067 /* Truncate suffix indicating the process is a zombie */
1068 e = endswith(p, " (deleted)");
1069 if (e)
1070 *e = 0;
1071
1072 *path = p;
1073 return 0;
1074 }
1075
1076 return -ENODATA;
1077 }
1078
1079 int cg_install_release_agent(const char *controller, const char *agent) {
1080 _cleanup_free_ char *fs = NULL, *contents = NULL;
1081 const char *sc;
1082 int r;
1083
1084 assert(agent);
1085
1086 r = cg_unified_controller(controller);
1087 if (r < 0)
1088 return r;
1089 if (r > 0) /* doesn't apply to unified hierarchy */
1090 return -EOPNOTSUPP;
1091
1092 r = cg_get_path(controller, NULL, "release_agent", &fs);
1093 if (r < 0)
1094 return r;
1095
1096 r = read_one_line_file(fs, &contents);
1097 if (r < 0)
1098 return r;
1099
1100 sc = strstrip(contents);
1101 if (isempty(sc)) {
1102 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
1103 if (r < 0)
1104 return r;
1105 } else if (!path_equal(sc, agent))
1106 return -EEXIST;
1107
1108 fs = mfree(fs);
1109 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1110 if (r < 0)
1111 return r;
1112
1113 contents = mfree(contents);
1114 r = read_one_line_file(fs, &contents);
1115 if (r < 0)
1116 return r;
1117
1118 sc = strstrip(contents);
1119 if (streq(sc, "0")) {
1120 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
1121 if (r < 0)
1122 return r;
1123
1124 return 1;
1125 }
1126
1127 if (!streq(sc, "1"))
1128 return -EIO;
1129
1130 return 0;
1131 }
1132
1133 int cg_uninstall_release_agent(const char *controller) {
1134 _cleanup_free_ char *fs = NULL;
1135 int r;
1136
1137 r = cg_unified_controller(controller);
1138 if (r < 0)
1139 return r;
1140 if (r > 0) /* Doesn't apply to unified hierarchy */
1141 return -EOPNOTSUPP;
1142
1143 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1144 if (r < 0)
1145 return r;
1146
1147 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
1148 if (r < 0)
1149 return r;
1150
1151 fs = mfree(fs);
1152
1153 r = cg_get_path(controller, NULL, "release_agent", &fs);
1154 if (r < 0)
1155 return r;
1156
1157 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
1158 if (r < 0)
1159 return r;
1160
1161 return 0;
1162 }
1163
1164 int cg_is_empty(const char *controller, const char *path) {
1165 _cleanup_fclose_ FILE *f = NULL;
1166 pid_t pid;
1167 int r;
1168
1169 assert(path);
1170
1171 r = cg_enumerate_processes(controller, path, &f);
1172 if (r == -ENOENT)
1173 return true;
1174 if (r < 0)
1175 return r;
1176
1177 r = cg_read_pid(f, &pid);
1178 if (r < 0)
1179 return r;
1180
1181 return r == 0;
1182 }
1183
1184 int cg_is_empty_recursive(const char *controller, const char *path) {
1185 int r;
1186
1187 assert(path);
1188
1189 /* The root cgroup is always populated */
1190 if (controller && empty_or_root(path))
1191 return false;
1192
1193 r = cg_unified_controller(controller);
1194 if (r < 0)
1195 return r;
1196 if (r > 0) {
1197 _cleanup_free_ char *t = NULL;
1198
1199 /* On the unified hierarchy we can check empty state
1200 * via the "populated" attribute of "cgroup.events". */
1201
1202 r = cg_read_event(controller, path, "populated", &t);
1203 if (r == -ENOENT)
1204 return true;
1205 if (r < 0)
1206 return r;
1207
1208 return streq(t, "0");
1209 } else {
1210 _cleanup_closedir_ DIR *d = NULL;
1211 char *fn;
1212
1213 r = cg_is_empty(controller, path);
1214 if (r <= 0)
1215 return r;
1216
1217 r = cg_enumerate_subgroups(controller, path, &d);
1218 if (r == -ENOENT)
1219 return true;
1220 if (r < 0)
1221 return r;
1222
1223 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1224 _cleanup_free_ char *p = NULL;
1225
1226 p = strjoin(path, "/", fn);
1227 free(fn);
1228 if (!p)
1229 return -ENOMEM;
1230
1231 r = cg_is_empty_recursive(controller, p);
1232 if (r <= 0)
1233 return r;
1234 }
1235 if (r < 0)
1236 return r;
1237
1238 return true;
1239 }
1240 }
1241
1242 int cg_split_spec(const char *spec, char **controller, char **path) {
1243 char *t = NULL, *u = NULL;
1244 const char *e;
1245
1246 assert(spec);
1247
1248 if (*spec == '/') {
1249 if (!path_is_normalized(spec))
1250 return -EINVAL;
1251
1252 if (path) {
1253 t = strdup(spec);
1254 if (!t)
1255 return -ENOMEM;
1256
1257 *path = path_simplify(t, false);
1258 }
1259
1260 if (controller)
1261 *controller = NULL;
1262
1263 return 0;
1264 }
1265
1266 e = strchr(spec, ':');
1267 if (!e) {
1268 if (!cg_controller_is_valid(spec))
1269 return -EINVAL;
1270
1271 if (controller) {
1272 t = strdup(spec);
1273 if (!t)
1274 return -ENOMEM;
1275
1276 *controller = t;
1277 }
1278
1279 if (path)
1280 *path = NULL;
1281
1282 return 0;
1283 }
1284
1285 t = strndup(spec, e-spec);
1286 if (!t)
1287 return -ENOMEM;
1288 if (!cg_controller_is_valid(t)) {
1289 free(t);
1290 return -EINVAL;
1291 }
1292
1293 if (isempty(e+1))
1294 u = NULL;
1295 else {
1296 u = strdup(e+1);
1297 if (!u) {
1298 free(t);
1299 return -ENOMEM;
1300 }
1301
1302 if (!path_is_normalized(u) ||
1303 !path_is_absolute(u)) {
1304 free(t);
1305 free(u);
1306 return -EINVAL;
1307 }
1308
1309 path_simplify(u, false);
1310 }
1311
1312 if (controller)
1313 *controller = t;
1314 else
1315 free(t);
1316
1317 if (path)
1318 *path = u;
1319 else
1320 free(u);
1321
1322 return 0;
1323 }
1324
1325 int cg_mangle_path(const char *path, char **result) {
1326 _cleanup_free_ char *c = NULL, *p = NULL;
1327 char *t;
1328 int r;
1329
1330 assert(path);
1331 assert(result);
1332
1333 /* First, check if it already is a filesystem path */
1334 if (path_startswith(path, "/sys/fs/cgroup")) {
1335
1336 t = strdup(path);
1337 if (!t)
1338 return -ENOMEM;
1339
1340 *result = path_simplify(t, false);
1341 return 0;
1342 }
1343
1344 /* Otherwise, treat it as cg spec */
1345 r = cg_split_spec(path, &c, &p);
1346 if (r < 0)
1347 return r;
1348
1349 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1350 }
1351
1352 int cg_get_root_path(char **path) {
1353 char *p, *e;
1354 int r;
1355
1356 assert(path);
1357
1358 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1359 if (r < 0)
1360 return r;
1361
1362 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1363 if (!e)
1364 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1365 if (!e)
1366 e = endswith(p, "/system"); /* even more legacy */
1367 if (e)
1368 *e = 0;
1369
1370 *path = p;
1371 return 0;
1372 }
1373
1374 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1375 _cleanup_free_ char *rt = NULL;
1376 char *p;
1377 int r;
1378
1379 assert(cgroup);
1380 assert(shifted);
1381
1382 if (!root) {
1383 /* If the root was specified let's use that, otherwise
1384 * let's determine it from PID 1 */
1385
1386 r = cg_get_root_path(&rt);
1387 if (r < 0)
1388 return r;
1389
1390 root = rt;
1391 }
1392
1393 p = path_startswith(cgroup, root);
1394 if (p && p > cgroup)
1395 *shifted = p - 1;
1396 else
1397 *shifted = cgroup;
1398
1399 return 0;
1400 }
1401
1402 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1403 _cleanup_free_ char *raw = NULL;
1404 const char *c;
1405 int r;
1406
1407 assert(pid >= 0);
1408 assert(cgroup);
1409
1410 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1411 if (r < 0)
1412 return r;
1413
1414 r = cg_shift_path(raw, root, &c);
1415 if (r < 0)
1416 return r;
1417
1418 if (c == raw)
1419 *cgroup = TAKE_PTR(raw);
1420 else {
1421 char *n;
1422
1423 n = strdup(c);
1424 if (!n)
1425 return -ENOMEM;
1426
1427 *cgroup = n;
1428 }
1429
1430 return 0;
1431 }
1432
1433 int cg_path_decode_unit(const char *cgroup, char **unit) {
1434 char *c, *s;
1435 size_t n;
1436
1437 assert(cgroup);
1438 assert(unit);
1439
1440 n = strcspn(cgroup, "/");
1441 if (n < 3)
1442 return -ENXIO;
1443
1444 c = strndupa(cgroup, n);
1445 c = cg_unescape(c);
1446
1447 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1448 return -ENXIO;
1449
1450 s = strdup(c);
1451 if (!s)
1452 return -ENOMEM;
1453
1454 *unit = s;
1455 return 0;
1456 }
1457
1458 static bool valid_slice_name(const char *p, size_t n) {
1459
1460 if (!p)
1461 return false;
1462
1463 if (n < STRLEN("x.slice"))
1464 return false;
1465
1466 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1467 char buf[n+1], *c;
1468
1469 memcpy(buf, p, n);
1470 buf[n] = 0;
1471
1472 c = cg_unescape(buf);
1473
1474 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1475 }
1476
1477 return false;
1478 }
1479
1480 static const char *skip_slices(const char *p) {
1481 assert(p);
1482
1483 /* Skips over all slice assignments */
1484
1485 for (;;) {
1486 size_t n;
1487
1488 p += strspn(p, "/");
1489
1490 n = strcspn(p, "/");
1491 if (!valid_slice_name(p, n))
1492 return p;
1493
1494 p += n;
1495 }
1496 }
1497
1498 int cg_path_get_unit(const char *path, char **ret) {
1499 const char *e;
1500 char *unit;
1501 int r;
1502
1503 assert(path);
1504 assert(ret);
1505
1506 e = skip_slices(path);
1507
1508 r = cg_path_decode_unit(e, &unit);
1509 if (r < 0)
1510 return r;
1511
1512 /* We skipped over the slices, don't accept any now */
1513 if (endswith(unit, ".slice")) {
1514 free(unit);
1515 return -ENXIO;
1516 }
1517
1518 *ret = unit;
1519 return 0;
1520 }
1521
1522 int cg_pid_get_unit(pid_t pid, char **unit) {
1523 _cleanup_free_ char *cgroup = NULL;
1524 int r;
1525
1526 assert(unit);
1527
1528 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1529 if (r < 0)
1530 return r;
1531
1532 return cg_path_get_unit(cgroup, unit);
1533 }
1534
1535 /**
1536 * Skip session-*.scope, but require it to be there.
1537 */
1538 static const char *skip_session(const char *p) {
1539 size_t n;
1540
1541 if (isempty(p))
1542 return NULL;
1543
1544 p += strspn(p, "/");
1545
1546 n = strcspn(p, "/");
1547 if (n < STRLEN("session-x.scope"))
1548 return NULL;
1549
1550 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1551 char buf[n - 8 - 6 + 1];
1552
1553 memcpy(buf, p + 8, n - 8 - 6);
1554 buf[n - 8 - 6] = 0;
1555
1556 /* Note that session scopes never need unescaping,
1557 * since they cannot conflict with the kernel's own
1558 * names, hence we don't need to call cg_unescape()
1559 * here. */
1560
1561 if (!session_id_valid(buf))
1562 return false;
1563
1564 p += n;
1565 p += strspn(p, "/");
1566 return p;
1567 }
1568
1569 return NULL;
1570 }
1571
1572 /**
1573 * Skip user@*.service, but require it to be there.
1574 */
1575 static const char *skip_user_manager(const char *p) {
1576 size_t n;
1577
1578 if (isempty(p))
1579 return NULL;
1580
1581 p += strspn(p, "/");
1582
1583 n = strcspn(p, "/");
1584 if (n < STRLEN("user@x.service"))
1585 return NULL;
1586
1587 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1588 char buf[n - 5 - 8 + 1];
1589
1590 memcpy(buf, p + 5, n - 5 - 8);
1591 buf[n - 5 - 8] = 0;
1592
1593 /* Note that user manager services never need unescaping,
1594 * since they cannot conflict with the kernel's own
1595 * names, hence we don't need to call cg_unescape()
1596 * here. */
1597
1598 if (parse_uid(buf, NULL) < 0)
1599 return NULL;
1600
1601 p += n;
1602 p += strspn(p, "/");
1603
1604 return p;
1605 }
1606
1607 return NULL;
1608 }
1609
1610 static const char *skip_user_prefix(const char *path) {
1611 const char *e, *t;
1612
1613 assert(path);
1614
1615 /* Skip slices, if there are any */
1616 e = skip_slices(path);
1617
1618 /* Skip the user manager, if it's in the path now... */
1619 t = skip_user_manager(e);
1620 if (t)
1621 return t;
1622
1623 /* Alternatively skip the user session if it is in the path... */
1624 return skip_session(e);
1625 }
1626
1627 int cg_path_get_user_unit(const char *path, char **ret) {
1628 const char *t;
1629
1630 assert(path);
1631 assert(ret);
1632
1633 t = skip_user_prefix(path);
1634 if (!t)
1635 return -ENXIO;
1636
1637 /* And from here on it looks pretty much the same as for a
1638 * system unit, hence let's use the same parser from here
1639 * on. */
1640 return cg_path_get_unit(t, ret);
1641 }
1642
1643 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1644 _cleanup_free_ char *cgroup = NULL;
1645 int r;
1646
1647 assert(unit);
1648
1649 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1650 if (r < 0)
1651 return r;
1652
1653 return cg_path_get_user_unit(cgroup, unit);
1654 }
1655
1656 int cg_path_get_machine_name(const char *path, char **machine) {
1657 _cleanup_free_ char *u = NULL;
1658 const char *sl;
1659 int r;
1660
1661 r = cg_path_get_unit(path, &u);
1662 if (r < 0)
1663 return r;
1664
1665 sl = strjoina("/run/systemd/machines/unit:", u);
1666 return readlink_malloc(sl, machine);
1667 }
1668
1669 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1670 _cleanup_free_ char *cgroup = NULL;
1671 int r;
1672
1673 assert(machine);
1674
1675 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1676 if (r < 0)
1677 return r;
1678
1679 return cg_path_get_machine_name(cgroup, machine);
1680 }
1681
1682 int cg_path_get_session(const char *path, char **session) {
1683 _cleanup_free_ char *unit = NULL;
1684 char *start, *end;
1685 int r;
1686
1687 assert(path);
1688
1689 r = cg_path_get_unit(path, &unit);
1690 if (r < 0)
1691 return r;
1692
1693 start = startswith(unit, "session-");
1694 if (!start)
1695 return -ENXIO;
1696 end = endswith(start, ".scope");
1697 if (!end)
1698 return -ENXIO;
1699
1700 *end = 0;
1701 if (!session_id_valid(start))
1702 return -ENXIO;
1703
1704 if (session) {
1705 char *rr;
1706
1707 rr = strdup(start);
1708 if (!rr)
1709 return -ENOMEM;
1710
1711 *session = rr;
1712 }
1713
1714 return 0;
1715 }
1716
1717 int cg_pid_get_session(pid_t pid, char **session) {
1718 _cleanup_free_ char *cgroup = NULL;
1719 int r;
1720
1721 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1722 if (r < 0)
1723 return r;
1724
1725 return cg_path_get_session(cgroup, session);
1726 }
1727
1728 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1729 _cleanup_free_ char *slice = NULL;
1730 char *start, *end;
1731 int r;
1732
1733 assert(path);
1734
1735 r = cg_path_get_slice(path, &slice);
1736 if (r < 0)
1737 return r;
1738
1739 start = startswith(slice, "user-");
1740 if (!start)
1741 return -ENXIO;
1742 end = endswith(start, ".slice");
1743 if (!end)
1744 return -ENXIO;
1745
1746 *end = 0;
1747 if (parse_uid(start, uid) < 0)
1748 return -ENXIO;
1749
1750 return 0;
1751 }
1752
1753 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1754 _cleanup_free_ char *cgroup = NULL;
1755 int r;
1756
1757 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1758 if (r < 0)
1759 return r;
1760
1761 return cg_path_get_owner_uid(cgroup, uid);
1762 }
1763
1764 int cg_path_get_slice(const char *p, char **slice) {
1765 const char *e = NULL;
1766
1767 assert(p);
1768 assert(slice);
1769
1770 /* Finds the right-most slice unit from the beginning, but
1771 * stops before we come to the first non-slice unit. */
1772
1773 for (;;) {
1774 size_t n;
1775
1776 p += strspn(p, "/");
1777
1778 n = strcspn(p, "/");
1779 if (!valid_slice_name(p, n)) {
1780
1781 if (!e) {
1782 char *s;
1783
1784 s = strdup(SPECIAL_ROOT_SLICE);
1785 if (!s)
1786 return -ENOMEM;
1787
1788 *slice = s;
1789 return 0;
1790 }
1791
1792 return cg_path_decode_unit(e, slice);
1793 }
1794
1795 e = p;
1796 p += n;
1797 }
1798 }
1799
1800 int cg_pid_get_slice(pid_t pid, char **slice) {
1801 _cleanup_free_ char *cgroup = NULL;
1802 int r;
1803
1804 assert(slice);
1805
1806 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1807 if (r < 0)
1808 return r;
1809
1810 return cg_path_get_slice(cgroup, slice);
1811 }
1812
1813 int cg_path_get_user_slice(const char *p, char **slice) {
1814 const char *t;
1815 assert(p);
1816 assert(slice);
1817
1818 t = skip_user_prefix(p);
1819 if (!t)
1820 return -ENXIO;
1821
1822 /* And now it looks pretty much the same as for a system
1823 * slice, so let's just use the same parser from here on. */
1824 return cg_path_get_slice(t, slice);
1825 }
1826
1827 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1828 _cleanup_free_ char *cgroup = NULL;
1829 int r;
1830
1831 assert(slice);
1832
1833 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1834 if (r < 0)
1835 return r;
1836
1837 return cg_path_get_user_slice(cgroup, slice);
1838 }
1839
1840 char *cg_escape(const char *p) {
1841 bool need_prefix = false;
1842
1843 /* This implements very minimal escaping for names to be used
1844 * as file names in the cgroup tree: any name which might
1845 * conflict with a kernel name or is prefixed with '_' is
1846 * prefixed with a '_'. That way, when reading cgroup names it
1847 * is sufficient to remove a single prefixing underscore if
1848 * there is one. */
1849
1850 /* The return value of this function (unlike cg_unescape())
1851 * needs free()! */
1852
1853 if (IN_SET(p[0], 0, '_', '.') ||
1854 streq(p, "notify_on_release") ||
1855 streq(p, "release_agent") ||
1856 streq(p, "tasks") ||
1857 startswith(p, "cgroup."))
1858 need_prefix = true;
1859 else {
1860 const char *dot;
1861
1862 dot = strrchr(p, '.');
1863 if (dot) {
1864 CGroupController c;
1865 size_t l = dot - p;
1866
1867 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1868 const char *n;
1869
1870 n = cgroup_controller_to_string(c);
1871
1872 if (l != strlen(n))
1873 continue;
1874
1875 if (memcmp(p, n, l) != 0)
1876 continue;
1877
1878 need_prefix = true;
1879 break;
1880 }
1881 }
1882 }
1883
1884 if (need_prefix)
1885 return strappend("_", p);
1886
1887 return strdup(p);
1888 }
1889
1890 char *cg_unescape(const char *p) {
1891 assert(p);
1892
1893 /* The return value of this function (unlike cg_escape())
1894 * doesn't need free()! */
1895
1896 if (p[0] == '_')
1897 return (char*) p+1;
1898
1899 return (char*) p;
1900 }
1901
1902 #define CONTROLLER_VALID \
1903 DIGITS LETTERS \
1904 "_"
1905
1906 bool cg_controller_is_valid(const char *p) {
1907 const char *t, *s;
1908
1909 if (!p)
1910 return false;
1911
1912 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1913 return true;
1914
1915 s = startswith(p, "name=");
1916 if (s)
1917 p = s;
1918
1919 if (IN_SET(*p, 0, '_'))
1920 return false;
1921
1922 for (t = p; *t; t++)
1923 if (!strchr(CONTROLLER_VALID, *t))
1924 return false;
1925
1926 if (t - p > FILENAME_MAX)
1927 return false;
1928
1929 return true;
1930 }
1931
1932 int cg_slice_to_path(const char *unit, char **ret) {
1933 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1934 const char *dash;
1935 int r;
1936
1937 assert(unit);
1938 assert(ret);
1939
1940 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1941 char *x;
1942
1943 x = strdup("");
1944 if (!x)
1945 return -ENOMEM;
1946 *ret = x;
1947 return 0;
1948 }
1949
1950 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1951 return -EINVAL;
1952
1953 if (!endswith(unit, ".slice"))
1954 return -EINVAL;
1955
1956 r = unit_name_to_prefix(unit, &p);
1957 if (r < 0)
1958 return r;
1959
1960 dash = strchr(p, '-');
1961
1962 /* Don't allow initial dashes */
1963 if (dash == p)
1964 return -EINVAL;
1965
1966 while (dash) {
1967 _cleanup_free_ char *escaped = NULL;
1968 char n[dash - p + sizeof(".slice")];
1969
1970 #if HAS_FEATURE_MEMORY_SANITIZER
1971 /* msan doesn't instrument stpncpy, so it thinks
1972 * n is later used unitialized:
1973 * https://github.com/google/sanitizers/issues/926
1974 */
1975 zero(n);
1976 #endif
1977
1978 /* Don't allow trailing or double dashes */
1979 if (IN_SET(dash[1], 0, '-'))
1980 return -EINVAL;
1981
1982 strcpy(stpncpy(n, p, dash - p), ".slice");
1983 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1984 return -EINVAL;
1985
1986 escaped = cg_escape(n);
1987 if (!escaped)
1988 return -ENOMEM;
1989
1990 if (!strextend(&s, escaped, "/", NULL))
1991 return -ENOMEM;
1992
1993 dash = strchr(dash+1, '-');
1994 }
1995
1996 e = cg_escape(unit);
1997 if (!e)
1998 return -ENOMEM;
1999
2000 if (!strextend(&s, e, NULL))
2001 return -ENOMEM;
2002
2003 *ret = TAKE_PTR(s);
2004
2005 return 0;
2006 }
2007
2008 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2009 _cleanup_free_ char *p = NULL;
2010 int r;
2011
2012 r = cg_get_path(controller, path, attribute, &p);
2013 if (r < 0)
2014 return r;
2015
2016 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
2017 }
2018
2019 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2020 _cleanup_free_ char *p = NULL;
2021 int r;
2022
2023 r = cg_get_path(controller, path, attribute, &p);
2024 if (r < 0)
2025 return r;
2026
2027 return read_one_line_file(p, ret);
2028 }
2029
2030 int cg_get_keyed_attribute(
2031 const char *controller,
2032 const char *path,
2033 const char *attribute,
2034 char **keys,
2035 char **ret_values) {
2036
2037 _cleanup_free_ char *filename = NULL, *contents = NULL;
2038 const char *p;
2039 size_t n, i, n_done = 0;
2040 char **v;
2041 int r;
2042
2043 /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
2044 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2045 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2046 *
2047 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
2048
2049 r = cg_get_path(controller, path, attribute, &filename);
2050 if (r < 0)
2051 return r;
2052
2053 r = read_full_file(filename, &contents, NULL);
2054 if (r < 0)
2055 return r;
2056
2057 n = strv_length(keys);
2058 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2059 return 0;
2060
2061 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2062 v = newa0(char*, n);
2063
2064 for (p = contents; *p;) {
2065 const char *w = NULL;
2066
2067 for (i = 0; i < n; i++)
2068 if (!v[i]) {
2069 w = first_word(p, keys[i]);
2070 if (w)
2071 break;
2072 }
2073
2074 if (w) {
2075 size_t l;
2076
2077 l = strcspn(w, NEWLINE);
2078 v[i] = strndup(w, l);
2079 if (!v[i]) {
2080 r = -ENOMEM;
2081 goto fail;
2082 }
2083
2084 n_done++;
2085 if (n_done >= n)
2086 goto done;
2087
2088 p = w + l;
2089 } else
2090 p += strcspn(p, NEWLINE);
2091
2092 p += strspn(p, NEWLINE);
2093 }
2094
2095 r = -ENXIO;
2096
2097 fail:
2098 for (i = 0; i < n; i++)
2099 free(v[i]);
2100
2101 return r;
2102
2103 done:
2104 memcpy(ret_values, v, sizeof(char*) * n);
2105 return 0;
2106
2107 }
2108
2109 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2110 CGroupController c;
2111 CGroupMask done;
2112 bool created;
2113 int r;
2114
2115 /* This one will create a cgroup in our private tree, but also
2116 * duplicate it in the trees specified in mask, and remove it
2117 * in all others.
2118 *
2119 * Returns 0 if the group already existed in the systemd hierarchy,
2120 * 1 on success, negative otherwise.
2121 */
2122
2123 /* First create the cgroup in our own hierarchy. */
2124 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2125 if (r < 0)
2126 return r;
2127 created = r;
2128
2129 /* If we are in the unified hierarchy, we are done now */
2130 r = cg_all_unified();
2131 if (r < 0)
2132 return r;
2133 if (r > 0)
2134 return created;
2135
2136 supported &= CGROUP_MASK_V1;
2137 mask = CGROUP_MASK_EXTEND_JOINED(mask);
2138 done = 0;
2139
2140 /* Otherwise, do the same in the other hierarchies */
2141 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2142 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2143 const char *n;
2144
2145 if (!FLAGS_SET(supported, bit))
2146 continue;
2147
2148 if (FLAGS_SET(done, bit))
2149 continue;
2150
2151 n = cgroup_controller_to_string(c);
2152 if (FLAGS_SET(mask, bit))
2153 (void) cg_create(n, path);
2154 else
2155 (void) cg_trim(n, path, true);
2156
2157 done |= CGROUP_MASK_EXTEND_JOINED(bit);
2158 }
2159
2160 return created;
2161 }
2162
2163 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2164 CGroupController c;
2165 CGroupMask done;
2166 int r;
2167
2168 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2169 if (r < 0)
2170 return r;
2171
2172 r = cg_all_unified();
2173 if (r < 0)
2174 return r;
2175 if (r > 0)
2176 return 0;
2177
2178 supported &= CGROUP_MASK_V1;
2179 done = 0;
2180
2181 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2182 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2183 const char *p = NULL;
2184
2185 if (!FLAGS_SET(supported, bit))
2186 continue;
2187
2188 if (FLAGS_SET(done, bit))
2189 continue;
2190
2191 if (path_callback)
2192 p = path_callback(bit, userdata);
2193 if (!p)
2194 p = path;
2195
2196 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2197 done |= CGROUP_MASK_EXTEND_JOINED(bit);
2198 }
2199
2200 return 0;
2201 }
2202
2203 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2204 Iterator i;
2205 void *pidp;
2206 int r = 0;
2207
2208 SET_FOREACH(pidp, pids, i) {
2209 pid_t pid = PTR_TO_PID(pidp);
2210 int q;
2211
2212 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2213 if (q < 0 && r >= 0)
2214 r = q;
2215 }
2216
2217 return r;
2218 }
2219
2220 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2221 CGroupController c;
2222 CGroupMask done;
2223 int r = 0, q;
2224
2225 if (!path_equal(from, to)) {
2226 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2227 if (r < 0)
2228 return r;
2229 }
2230
2231 q = cg_all_unified();
2232 if (q < 0)
2233 return q;
2234 if (q > 0)
2235 return r;
2236
2237 supported &= CGROUP_MASK_V1;
2238 done = 0;
2239
2240 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2241 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2242 const char *p = NULL;
2243
2244 if (!FLAGS_SET(supported, bit))
2245 continue;
2246
2247 if (FLAGS_SET(done, bit))
2248 continue;
2249
2250 if (to_callback)
2251 p = to_callback(bit, userdata);
2252 if (!p)
2253 p = to;
2254
2255 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2256 done |= CGROUP_MASK_EXTEND_JOINED(bit);
2257 }
2258
2259 return r;
2260 }
2261
2262 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2263 CGroupController c;
2264 CGroupMask done;
2265 int r, q;
2266
2267 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2268 if (r < 0)
2269 return r;
2270
2271 q = cg_all_unified();
2272 if (q < 0)
2273 return q;
2274 if (q > 0)
2275 return r;
2276
2277 supported &= CGROUP_MASK_V1;
2278 done = 0;
2279
2280 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2281 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2282
2283 if (!FLAGS_SET(supported, bit))
2284 continue;
2285
2286 if (FLAGS_SET(done, bit))
2287 continue;
2288
2289 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2290 done |= CGROUP_MASK_EXTEND_JOINED(bit);
2291 }
2292
2293 return r;
2294 }
2295
2296 int cg_mask_to_string(CGroupMask mask, char **ret) {
2297 _cleanup_free_ char *s = NULL;
2298 size_t n = 0, allocated = 0;
2299 bool space = false;
2300 CGroupController c;
2301
2302 assert(ret);
2303
2304 if (mask == 0) {
2305 *ret = NULL;
2306 return 0;
2307 }
2308
2309 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2310 const char *k;
2311 size_t l;
2312
2313 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
2314 continue;
2315
2316 k = cgroup_controller_to_string(c);
2317 l = strlen(k);
2318
2319 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2320 return -ENOMEM;
2321
2322 if (space)
2323 s[n] = ' ';
2324 memcpy(s + n + space, k, l);
2325 n += space + l;
2326
2327 space = true;
2328 }
2329
2330 assert(s);
2331
2332 s[n] = 0;
2333 *ret = TAKE_PTR(s);
2334
2335 return 0;
2336 }
2337
2338 int cg_mask_from_string(const char *value, CGroupMask *ret) {
2339 CGroupMask m = 0;
2340
2341 assert(ret);
2342 assert(value);
2343
2344 for (;;) {
2345 _cleanup_free_ char *n = NULL;
2346 CGroupController v;
2347 int r;
2348
2349 r = extract_first_word(&value, &n, NULL, 0);
2350 if (r < 0)
2351 return r;
2352 if (r == 0)
2353 break;
2354
2355 v = cgroup_controller_from_string(n);
2356 if (v < 0)
2357 continue;
2358
2359 m |= CGROUP_CONTROLLER_TO_MASK(v);
2360 }
2361
2362 *ret = m;
2363 return 0;
2364 }
2365
2366 int cg_mask_supported(CGroupMask *ret) {
2367 CGroupMask mask;
2368 int r;
2369
2370 /* Determines the mask of supported cgroup controllers. Only
2371 * includes controllers we can make sense of and that are
2372 * actually accessible. */
2373
2374 r = cg_all_unified();
2375 if (r < 0)
2376 return r;
2377 if (r > 0) {
2378 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2379
2380 /* In the unified hierarchy we can read the supported
2381 * and accessible controllers from a the top-level
2382 * cgroup attribute */
2383
2384 r = cg_get_root_path(&root);
2385 if (r < 0)
2386 return r;
2387
2388 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2389 if (r < 0)
2390 return r;
2391
2392 r = read_one_line_file(path, &controllers);
2393 if (r < 0)
2394 return r;
2395
2396 r = cg_mask_from_string(controllers, &mask);
2397 if (r < 0)
2398 return r;
2399
2400 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
2401 * everything else off. */
2402 mask &= CGROUP_MASK_V2;
2403
2404 } else {
2405 CGroupController c;
2406
2407 /* In the legacy hierarchy, we check which hierarchies are mounted. */
2408
2409 mask = 0;
2410 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2411 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2412 const char *n;
2413
2414 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2415 continue;
2416
2417 n = cgroup_controller_to_string(c);
2418 if (controller_is_accessible(n) >= 0)
2419 mask |= bit;
2420 }
2421 }
2422
2423 *ret = mask;
2424 return 0;
2425 }
2426
2427 int cg_kernel_controllers(Set **ret) {
2428 _cleanup_set_free_free_ Set *controllers = NULL;
2429 _cleanup_fclose_ FILE *f = NULL;
2430 int r;
2431
2432 assert(ret);
2433
2434 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2435 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2436 * pseudo-controllers. */
2437
2438 controllers = set_new(&string_hash_ops);
2439 if (!controllers)
2440 return -ENOMEM;
2441
2442 f = fopen("/proc/cgroups", "re");
2443 if (!f) {
2444 if (errno == ENOENT) {
2445 *ret = NULL;
2446 return 0;
2447 }
2448
2449 return -errno;
2450 }
2451
2452 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2453
2454 /* Ignore the header line */
2455 (void) read_line(f, (size_t) -1, NULL);
2456
2457 for (;;) {
2458 char *controller;
2459 int enabled = 0;
2460
2461 errno = 0;
2462 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2463
2464 if (feof(f))
2465 break;
2466
2467 if (ferror(f) && errno > 0)
2468 return -errno;
2469
2470 return -EBADMSG;
2471 }
2472
2473 if (!enabled) {
2474 free(controller);
2475 continue;
2476 }
2477
2478 if (!cg_controller_is_valid(controller)) {
2479 free(controller);
2480 return -EBADMSG;
2481 }
2482
2483 r = set_consume(controllers, controller);
2484 if (r < 0)
2485 return r;
2486 }
2487
2488 *ret = TAKE_PTR(controllers);
2489
2490 return 0;
2491 }
2492
2493 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2494
2495 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2496 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2497 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2498 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2499 *
2500 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2501 * process management but disable the compat dual layout, we return %true on
2502 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2503 */
2504 static thread_local bool unified_systemd_v232;
2505
2506 static int cg_unified_update(void) {
2507
2508 struct statfs fs;
2509
2510 /* Checks if we support the unified hierarchy. Returns an
2511 * error when the cgroup hierarchies aren't mounted yet or we
2512 * have any other trouble determining if the unified hierarchy
2513 * is supported. */
2514
2515 if (unified_cache >= CGROUP_UNIFIED_NONE)
2516 return 0;
2517
2518 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2519 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
2520
2521 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2522 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2523 unified_cache = CGROUP_UNIFIED_ALL;
2524 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2525 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2526 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2527 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2528 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2529 unified_systemd_v232 = false;
2530 } else {
2531 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2532 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2533
2534 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2535 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2536 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2537 unified_systemd_v232 = true;
2538 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2539 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2540 unified_cache = CGROUP_UNIFIED_NONE;
2541 } else {
2542 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2543 (unsigned long long) fs.f_type);
2544 unified_cache = CGROUP_UNIFIED_NONE;
2545 }
2546 }
2547 } else {
2548 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2549 (unsigned long long) fs.f_type);
2550 return -ENOMEDIUM;
2551 }
2552
2553 return 0;
2554 }
2555
2556 int cg_unified_controller(const char *controller) {
2557 int r;
2558
2559 r = cg_unified_update();
2560 if (r < 0)
2561 return r;
2562
2563 if (unified_cache == CGROUP_UNIFIED_NONE)
2564 return false;
2565
2566 if (unified_cache >= CGROUP_UNIFIED_ALL)
2567 return true;
2568
2569 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2570 }
2571
2572 int cg_all_unified(void) {
2573 int r;
2574
2575 r = cg_unified_update();
2576 if (r < 0)
2577 return r;
2578
2579 return unified_cache >= CGROUP_UNIFIED_ALL;
2580 }
2581
2582 int cg_hybrid_unified(void) {
2583 int r;
2584
2585 r = cg_unified_update();
2586 if (r < 0)
2587 return r;
2588
2589 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2590 }
2591
2592 int cg_unified_flush(void) {
2593 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2594
2595 return cg_unified_update();
2596 }
2597
2598 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2599 _cleanup_fclose_ FILE *f = NULL;
2600 _cleanup_free_ char *fs = NULL;
2601 CGroupController c;
2602 int r;
2603
2604 assert(p);
2605
2606 if (supported == 0)
2607 return 0;
2608
2609 r = cg_all_unified();
2610 if (r < 0)
2611 return r;
2612 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2613 return 0;
2614
2615 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2616 if (r < 0)
2617 return r;
2618
2619 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2620 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2621 const char *n;
2622
2623 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
2624 continue;
2625
2626 if (!FLAGS_SET(supported, bit))
2627 continue;
2628
2629 n = cgroup_controller_to_string(c);
2630 {
2631 char s[1 + strlen(n) + 1];
2632
2633 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
2634 strcpy(s + 1, n);
2635
2636 if (!f) {
2637 f = fopen(fs, "we");
2638 if (!f) {
2639 log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2640 break;
2641 }
2642 }
2643
2644 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
2645 if (r < 0) {
2646 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2647 clearerr(f);
2648 }
2649 }
2650 }
2651
2652 return 0;
2653 }
2654
2655 bool cg_is_unified_wanted(void) {
2656 static thread_local int wanted = -1;
2657 int r;
2658 bool b;
2659 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2660
2661 /* If we have a cached value, return that. */
2662 if (wanted >= 0)
2663 return wanted;
2664
2665 /* If the hierarchy is already mounted, then follow whatever
2666 * was chosen for it. */
2667 if (cg_unified_flush() >= 0)
2668 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2669
2670 /* Otherwise, let's see what the kernel command line has to say.
2671 * Since checking is expensive, cache a non-error result. */
2672 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2673
2674 return (wanted = r > 0 ? b : is_default);
2675 }
2676
2677 bool cg_is_legacy_wanted(void) {
2678 static thread_local int wanted = -1;
2679
2680 /* If we have a cached value, return that. */
2681 if (wanted >= 0)
2682 return wanted;
2683
2684 /* Check if we have cgroups2 already mounted. */
2685 if (cg_unified_flush() >= 0 &&
2686 unified_cache == CGROUP_UNIFIED_ALL)
2687 return (wanted = false);
2688
2689 /* Otherwise, assume that at least partial legacy is wanted,
2690 * since cgroups2 should already be mounted at this point. */
2691 return (wanted = true);
2692 }
2693
2694 bool cg_is_hybrid_wanted(void) {
2695 static thread_local int wanted = -1;
2696 int r;
2697 bool b;
2698 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2699 /* We default to true if the default is "hybrid", obviously,
2700 * but also when the default is "unified", because if we get
2701 * called, it means that unified hierarchy was not mounted. */
2702
2703 /* If we have a cached value, return that. */
2704 if (wanted >= 0)
2705 return wanted;
2706
2707 /* If the hierarchy is already mounted, then follow whatever
2708 * was chosen for it. */
2709 if (cg_unified_flush() >= 0 &&
2710 unified_cache == CGROUP_UNIFIED_ALL)
2711 return (wanted = false);
2712
2713 /* Otherwise, let's see what the kernel command line has to say.
2714 * Since checking is expensive, cache a non-error result. */
2715 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2716
2717 /* The meaning of the kernel option is reversed wrt. to the return value
2718 * of this function, hence the negation. */
2719 return (wanted = r > 0 ? !b : is_default);
2720 }
2721
2722 int cg_weight_parse(const char *s, uint64_t *ret) {
2723 uint64_t u;
2724 int r;
2725
2726 if (isempty(s)) {
2727 *ret = CGROUP_WEIGHT_INVALID;
2728 return 0;
2729 }
2730
2731 r = safe_atou64(s, &u);
2732 if (r < 0)
2733 return r;
2734
2735 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2736 return -ERANGE;
2737
2738 *ret = u;
2739 return 0;
2740 }
2741
2742 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2743 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2744 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2745 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2746 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2747 };
2748
2749 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2750 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2751 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2752 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2753 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2754 };
2755
2756 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2757
2758 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2759 uint64_t u;
2760 int r;
2761
2762 if (isempty(s)) {
2763 *ret = CGROUP_CPU_SHARES_INVALID;
2764 return 0;
2765 }
2766
2767 r = safe_atou64(s, &u);
2768 if (r < 0)
2769 return r;
2770
2771 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2772 return -ERANGE;
2773
2774 *ret = u;
2775 return 0;
2776 }
2777
2778 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2779 uint64_t u;
2780 int r;
2781
2782 if (isempty(s)) {
2783 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2784 return 0;
2785 }
2786
2787 r = safe_atou64(s, &u);
2788 if (r < 0)
2789 return r;
2790
2791 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2792 return -ERANGE;
2793
2794 *ret = u;
2795 return 0;
2796 }
2797
2798 bool is_cgroup_fs(const struct statfs *s) {
2799 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2800 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2801 }
2802
2803 bool fd_is_cgroup_fs(int fd) {
2804 struct statfs s;
2805
2806 if (fstatfs(fd, &s) < 0)
2807 return -errno;
2808
2809 return is_cgroup_fs(&s);
2810 }
2811
2812 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2813 [CGROUP_CONTROLLER_CPU] = "cpu",
2814 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2815 [CGROUP_CONTROLLER_IO] = "io",
2816 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2817 [CGROUP_CONTROLLER_MEMORY] = "memory",
2818 [CGROUP_CONTROLLER_DEVICES] = "devices",
2819 [CGROUP_CONTROLLER_PIDS] = "pids",
2820 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2821 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2822 };
2823
2824 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);