]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #8399 from keszybz/systemctl-kexec
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <dirent.h>
22 #include <errno.h>
23 #include <ftw.h>
24 #include <limits.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/statfs.h>
32 #include <sys/types.h>
33 #include <sys/xattr.h>
34 #include <unistd.h>
35
36 #include "alloc-util.h"
37 #include "cgroup-util.h"
38 #include "def.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "log.h"
46 #include "login-util.h"
47 #include "macro.h"
48 #include "missing.h"
49 #include "mkdir.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "proc-cmdline.h"
53 #include "process-util.h"
54 #include "set.h"
55 #include "special.h"
56 #include "stat-util.h"
57 #include "stdio-util.h"
58 #include "string-table.h"
59 #include "string-util.h"
60 #include "strv.h"
61 #include "unit-name.h"
62 #include "user-util.h"
63
64 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
65 _cleanup_free_ char *fs = NULL;
66 FILE *f;
67 int r;
68
69 assert(_f);
70
71 r = cg_get_path(controller, path, "cgroup.procs", &fs);
72 if (r < 0)
73 return r;
74
75 f = fopen(fs, "re");
76 if (!f)
77 return -errno;
78
79 *_f = f;
80 return 0;
81 }
82
83 int cg_read_pid(FILE *f, pid_t *_pid) {
84 unsigned long ul;
85
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
88
89 assert(f);
90 assert(_pid);
91
92 errno = 0;
93 if (fscanf(f, "%lu", &ul) != 1) {
94
95 if (feof(f))
96 return 0;
97
98 return errno > 0 ? -errno : -EIO;
99 }
100
101 if (ul <= 0)
102 return -EIO;
103
104 *_pid = (pid_t) ul;
105 return 1;
106 }
107
108 int cg_read_event(
109 const char *controller,
110 const char *path,
111 const char *event,
112 char **val) {
113
114 _cleanup_free_ char *events = NULL, *content = NULL;
115 char *p, *line;
116 int r;
117
118 r = cg_get_path(controller, path, "cgroup.events", &events);
119 if (r < 0)
120 return r;
121
122 r = read_full_file(events, &content, NULL);
123 if (r < 0)
124 return r;
125
126 p = content;
127 while ((line = strsep(&p, "\n"))) {
128 char *key;
129
130 key = strsep(&line, " ");
131 if (!key || !line)
132 return -EINVAL;
133
134 if (strcmp(key, event))
135 continue;
136
137 *val = strdup(line);
138 return 0;
139 }
140
141 return -ENOENT;
142 }
143
144 bool cg_ns_supported(void) {
145 static thread_local int enabled = -1;
146
147 if (enabled >= 0)
148 return enabled;
149
150 if (access("/proc/self/ns/cgroup", F_OK) == 0)
151 enabled = 1;
152 else
153 enabled = 0;
154
155 return enabled;
156 }
157
158 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
159 _cleanup_free_ char *fs = NULL;
160 int r;
161 DIR *d;
162
163 assert(_d);
164
165 /* This is not recursive! */
166
167 r = cg_get_path(controller, path, NULL, &fs);
168 if (r < 0)
169 return r;
170
171 d = opendir(fs);
172 if (!d)
173 return -errno;
174
175 *_d = d;
176 return 0;
177 }
178
179 int cg_read_subgroup(DIR *d, char **fn) {
180 struct dirent *de;
181
182 assert(d);
183 assert(fn);
184
185 FOREACH_DIRENT_ALL(de, d, return -errno) {
186 char *b;
187
188 if (de->d_type != DT_DIR)
189 continue;
190
191 if (dot_or_dot_dot(de->d_name))
192 continue;
193
194 b = strdup(de->d_name);
195 if (!b)
196 return -ENOMEM;
197
198 *fn = b;
199 return 1;
200 }
201
202 return 0;
203 }
204
205 int cg_rmdir(const char *controller, const char *path) {
206 _cleanup_free_ char *p = NULL;
207 int r;
208
209 r = cg_get_path(controller, path, NULL, &p);
210 if (r < 0)
211 return r;
212
213 r = rmdir(p);
214 if (r < 0 && errno != ENOENT)
215 return -errno;
216
217 r = cg_hybrid_unified();
218 if (r < 0)
219 return r;
220 if (r == 0)
221 return 0;
222
223 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
224 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
225 if (r < 0)
226 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
227 }
228
229 return 0;
230 }
231
232 int cg_kill(
233 const char *controller,
234 const char *path,
235 int sig,
236 CGroupFlags flags,
237 Set *s,
238 cg_kill_log_func_t log_kill,
239 void *userdata) {
240
241 _cleanup_set_free_ Set *allocated_set = NULL;
242 bool done = false;
243 int r, ret = 0;
244 pid_t my_pid;
245
246 assert(sig >= 0);
247
248 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
249 * SIGCONT on SIGKILL. */
250 if (IN_SET(sig, SIGCONT, SIGKILL))
251 flags &= ~CGROUP_SIGCONT;
252
253 /* This goes through the tasks list and kills them all. This
254 * is repeated until no further processes are added to the
255 * tasks list, to properly handle forking processes */
256
257 if (!s) {
258 s = allocated_set = set_new(NULL);
259 if (!s)
260 return -ENOMEM;
261 }
262
263 my_pid = getpid_cached();
264
265 do {
266 _cleanup_fclose_ FILE *f = NULL;
267 pid_t pid = 0;
268 done = true;
269
270 r = cg_enumerate_processes(controller, path, &f);
271 if (r < 0) {
272 if (ret >= 0 && r != -ENOENT)
273 return r;
274
275 return ret;
276 }
277
278 while ((r = cg_read_pid(f, &pid)) > 0) {
279
280 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
281 continue;
282
283 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
284 continue;
285
286 if (log_kill)
287 log_kill(pid, sig, userdata);
288
289 /* If we haven't killed this process yet, kill
290 * it */
291 if (kill(pid, sig) < 0) {
292 if (ret >= 0 && errno != ESRCH)
293 ret = -errno;
294 } else {
295 if (flags & CGROUP_SIGCONT)
296 (void) kill(pid, SIGCONT);
297
298 if (ret == 0)
299 ret = 1;
300 }
301
302 done = false;
303
304 r = set_put(s, PID_TO_PTR(pid));
305 if (r < 0) {
306 if (ret >= 0)
307 return r;
308
309 return ret;
310 }
311 }
312
313 if (r < 0) {
314 if (ret >= 0)
315 return r;
316
317 return ret;
318 }
319
320 /* To avoid racing against processes which fork
321 * quicker than we can kill them we repeat this until
322 * no new pids need to be killed. */
323
324 } while (!done);
325
326 return ret;
327 }
328
329 int cg_kill_recursive(
330 const char *controller,
331 const char *path,
332 int sig,
333 CGroupFlags flags,
334 Set *s,
335 cg_kill_log_func_t log_kill,
336 void *userdata) {
337
338 _cleanup_set_free_ Set *allocated_set = NULL;
339 _cleanup_closedir_ DIR *d = NULL;
340 int r, ret;
341 char *fn;
342
343 assert(path);
344 assert(sig >= 0);
345
346 if (!s) {
347 s = allocated_set = set_new(NULL);
348 if (!s)
349 return -ENOMEM;
350 }
351
352 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
353
354 r = cg_enumerate_subgroups(controller, path, &d);
355 if (r < 0) {
356 if (ret >= 0 && r != -ENOENT)
357 return r;
358
359 return ret;
360 }
361
362 while ((r = cg_read_subgroup(d, &fn)) > 0) {
363 _cleanup_free_ char *p = NULL;
364
365 p = strjoin(path, "/", fn);
366 free(fn);
367 if (!p)
368 return -ENOMEM;
369
370 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
371 if (r != 0 && ret >= 0)
372 ret = r;
373 }
374 if (ret >= 0 && r < 0)
375 ret = r;
376
377 if (flags & CGROUP_REMOVE) {
378 r = cg_rmdir(controller, path);
379 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
380 return r;
381 }
382
383 return ret;
384 }
385
386 int cg_migrate(
387 const char *cfrom,
388 const char *pfrom,
389 const char *cto,
390 const char *pto,
391 CGroupFlags flags) {
392
393 bool done = false;
394 _cleanup_set_free_ Set *s = NULL;
395 int r, ret = 0;
396 pid_t my_pid;
397
398 assert(cfrom);
399 assert(pfrom);
400 assert(cto);
401 assert(pto);
402
403 s = set_new(NULL);
404 if (!s)
405 return -ENOMEM;
406
407 my_pid = getpid_cached();
408
409 do {
410 _cleanup_fclose_ FILE *f = NULL;
411 pid_t pid = 0;
412 done = true;
413
414 r = cg_enumerate_processes(cfrom, pfrom, &f);
415 if (r < 0) {
416 if (ret >= 0 && r != -ENOENT)
417 return r;
418
419 return ret;
420 }
421
422 while ((r = cg_read_pid(f, &pid)) > 0) {
423
424 /* This might do weird stuff if we aren't a
425 * single-threaded program. However, we
426 * luckily know we are not */
427 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
428 continue;
429
430 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
431 continue;
432
433 /* Ignore kernel threads. Since they can only
434 * exist in the root cgroup, we only check for
435 * them there. */
436 if (cfrom &&
437 (isempty(pfrom) || path_equal(pfrom, "/")) &&
438 is_kernel_thread(pid) > 0)
439 continue;
440
441 r = cg_attach(cto, pto, pid);
442 if (r < 0) {
443 if (ret >= 0 && r != -ESRCH)
444 ret = r;
445 } else if (ret == 0)
446 ret = 1;
447
448 done = false;
449
450 r = set_put(s, PID_TO_PTR(pid));
451 if (r < 0) {
452 if (ret >= 0)
453 return r;
454
455 return ret;
456 }
457 }
458
459 if (r < 0) {
460 if (ret >= 0)
461 return r;
462
463 return ret;
464 }
465 } while (!done);
466
467 return ret;
468 }
469
470 int cg_migrate_recursive(
471 const char *cfrom,
472 const char *pfrom,
473 const char *cto,
474 const char *pto,
475 CGroupFlags flags) {
476
477 _cleanup_closedir_ DIR *d = NULL;
478 int r, ret = 0;
479 char *fn;
480
481 assert(cfrom);
482 assert(pfrom);
483 assert(cto);
484 assert(pto);
485
486 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
487
488 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
489 if (r < 0) {
490 if (ret >= 0 && r != -ENOENT)
491 return r;
492
493 return ret;
494 }
495
496 while ((r = cg_read_subgroup(d, &fn)) > 0) {
497 _cleanup_free_ char *p = NULL;
498
499 p = strjoin(pfrom, "/", fn);
500 free(fn);
501 if (!p)
502 return -ENOMEM;
503
504 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
505 if (r != 0 && ret >= 0)
506 ret = r;
507 }
508
509 if (r < 0 && ret >= 0)
510 ret = r;
511
512 if (flags & CGROUP_REMOVE) {
513 r = cg_rmdir(cfrom, pfrom);
514 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
515 return r;
516 }
517
518 return ret;
519 }
520
521 int cg_migrate_recursive_fallback(
522 const char *cfrom,
523 const char *pfrom,
524 const char *cto,
525 const char *pto,
526 CGroupFlags flags) {
527
528 int r;
529
530 assert(cfrom);
531 assert(pfrom);
532 assert(cto);
533 assert(pto);
534
535 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
536 if (r < 0) {
537 char prefix[strlen(pto) + 1];
538
539 /* This didn't work? Then let's try all prefixes of the destination */
540
541 PATH_FOREACH_PREFIX(prefix, pto) {
542 int q;
543
544 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
545 if (q >= 0)
546 return q;
547 }
548 }
549
550 return r;
551 }
552
553 static const char *controller_to_dirname(const char *controller) {
554 const char *e;
555
556 assert(controller);
557
558 /* Converts a controller name to the directory name below
559 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
560 * just cuts off the name= prefixed used for named
561 * hierarchies, if it is specified. */
562
563 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
564 if (cg_hybrid_unified() > 0)
565 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
566 else
567 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
568 }
569
570 e = startswith(controller, "name=");
571 if (e)
572 return e;
573
574 return controller;
575 }
576
577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
578 const char *dn;
579 char *t = NULL;
580
581 assert(fs);
582 assert(controller);
583
584 dn = controller_to_dirname(controller);
585
586 if (isempty(path) && isempty(suffix))
587 t = strappend("/sys/fs/cgroup/", dn);
588 else if (isempty(path))
589 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
590 else if (isempty(suffix))
591 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
592 else
593 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
594 if (!t)
595 return -ENOMEM;
596
597 *fs = t;
598 return 0;
599 }
600
601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
602 char *t;
603
604 assert(fs);
605
606 if (isempty(path) && isempty(suffix))
607 t = strdup("/sys/fs/cgroup");
608 else if (isempty(path))
609 t = strappend("/sys/fs/cgroup/", suffix);
610 else if (isempty(suffix))
611 t = strappend("/sys/fs/cgroup/", path);
612 else
613 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
614 if (!t)
615 return -ENOMEM;
616
617 *fs = t;
618 return 0;
619 }
620
621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
622 int r;
623
624 assert(fs);
625
626 if (!controller) {
627 char *t;
628
629 /* If no controller is specified, we return the path
630 * *below* the controllers, without any prefix. */
631
632 if (!path && !suffix)
633 return -EINVAL;
634
635 if (!suffix)
636 t = strdup(path);
637 else if (!path)
638 t = strdup(suffix);
639 else
640 t = strjoin(path, "/", suffix);
641 if (!t)
642 return -ENOMEM;
643
644 *fs = path_kill_slashes(t);
645 return 0;
646 }
647
648 if (!cg_controller_is_valid(controller))
649 return -EINVAL;
650
651 r = cg_all_unified();
652 if (r < 0)
653 return r;
654 if (r > 0)
655 r = join_path_unified(path, suffix, fs);
656 else
657 r = join_path_legacy(controller, path, suffix, fs);
658 if (r < 0)
659 return r;
660
661 path_kill_slashes(*fs);
662 return 0;
663 }
664
665 static int controller_is_accessible(const char *controller) {
666 int r;
667
668 assert(controller);
669
670 /* Checks whether a specific controller is accessible,
671 * i.e. its hierarchy mounted. In the unified hierarchy all
672 * controllers are considered accessible, except for the named
673 * hierarchies */
674
675 if (!cg_controller_is_valid(controller))
676 return -EINVAL;
677
678 r = cg_all_unified();
679 if (r < 0)
680 return r;
681 if (r > 0) {
682 /* We don't support named hierarchies if we are using
683 * the unified hierarchy. */
684
685 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
686 return 0;
687
688 if (startswith(controller, "name="))
689 return -EOPNOTSUPP;
690
691 } else {
692 const char *cc, *dn;
693
694 dn = controller_to_dirname(controller);
695 cc = strjoina("/sys/fs/cgroup/", dn);
696
697 if (laccess(cc, F_OK) < 0)
698 return -errno;
699 }
700
701 return 0;
702 }
703
704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
705 int r;
706
707 assert(controller);
708 assert(fs);
709
710 /* Check if the specified controller is actually accessible */
711 r = controller_is_accessible(controller);
712 if (r < 0)
713 return r;
714
715 return cg_get_path(controller, path, suffix, fs);
716 }
717
718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
719 assert(path);
720 assert(sb);
721 assert(ftwbuf);
722
723 if (typeflag != FTW_DP)
724 return 0;
725
726 if (ftwbuf->level < 1)
727 return 0;
728
729 (void) rmdir(path);
730 return 0;
731 }
732
733 int cg_trim(const char *controller, const char *path, bool delete_root) {
734 _cleanup_free_ char *fs = NULL;
735 int r = 0, q;
736
737 assert(path);
738
739 r = cg_get_path(controller, path, NULL, &fs);
740 if (r < 0)
741 return r;
742
743 errno = 0;
744 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
745 if (errno == ENOENT)
746 r = 0;
747 else if (errno > 0)
748 r = -errno;
749 else
750 r = -EIO;
751 }
752
753 if (delete_root) {
754 if (rmdir(fs) < 0 && errno != ENOENT)
755 return -errno;
756 }
757
758 q = cg_hybrid_unified();
759 if (q < 0)
760 return q;
761 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
762 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
763 if (q < 0)
764 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
765 }
766
767 return r;
768 }
769
770 int cg_create(const char *controller, const char *path) {
771 _cleanup_free_ char *fs = NULL;
772 int r;
773
774 r = cg_get_path_and_check(controller, path, NULL, &fs);
775 if (r < 0)
776 return r;
777
778 r = mkdir_parents(fs, 0755);
779 if (r < 0)
780 return r;
781
782 r = mkdir_errno_wrapper(fs, 0755);
783 if (r == -EEXIST)
784 return 0;
785 if (r < 0)
786 return r;
787
788 r = cg_hybrid_unified();
789 if (r < 0)
790 return r;
791
792 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
793 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
794 if (r < 0)
795 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
796 }
797
798 return 1;
799 }
800
801 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
802 int r, q;
803
804 assert(pid >= 0);
805
806 r = cg_create(controller, path);
807 if (r < 0)
808 return r;
809
810 q = cg_attach(controller, path, pid);
811 if (q < 0)
812 return q;
813
814 /* This does not remove the cgroup on failure */
815 return r;
816 }
817
818 int cg_attach(const char *controller, const char *path, pid_t pid) {
819 _cleanup_free_ char *fs = NULL;
820 char c[DECIMAL_STR_MAX(pid_t) + 2];
821 int r;
822
823 assert(path);
824 assert(pid >= 0);
825
826 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
827 if (r < 0)
828 return r;
829
830 if (pid == 0)
831 pid = getpid_cached();
832
833 xsprintf(c, PID_FMT "\n", pid);
834
835 r = write_string_file(fs, c, 0);
836 if (r < 0)
837 return r;
838
839 r = cg_hybrid_unified();
840 if (r < 0)
841 return r;
842
843 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
844 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
845 if (r < 0)
846 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
847 }
848
849 return 0;
850 }
851
852 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
853 int r;
854
855 assert(controller);
856 assert(path);
857 assert(pid >= 0);
858
859 r = cg_attach(controller, path, pid);
860 if (r < 0) {
861 char prefix[strlen(path) + 1];
862
863 /* This didn't work? Then let's try all prefixes of
864 * the destination */
865
866 PATH_FOREACH_PREFIX(prefix, path) {
867 int q;
868
869 q = cg_attach(controller, prefix, pid);
870 if (q >= 0)
871 return q;
872 }
873 }
874
875 return r;
876 }
877
878 int cg_set_access(
879 const char *controller,
880 const char *path,
881 uid_t uid,
882 gid_t gid) {
883
884 struct Attribute {
885 const char *name;
886 bool fatal;
887 };
888
889 /* cgroupsv1, aka legacy/non-unified */
890 static const struct Attribute legacy_attributes[] = {
891 { "cgroup.procs", true },
892 { "tasks", false },
893 { "cgroup.clone_children", false },
894 {},
895 };
896
897 /* cgroupsv2, aka unified */
898 static const struct Attribute unified_attributes[] = {
899 { "cgroup.procs", true },
900 { "cgroup.subtree_control", true },
901 { "cgroup.threads", false },
902 {},
903 };
904
905 static const struct Attribute* const attributes[] = {
906 [false] = legacy_attributes,
907 [true] = unified_attributes,
908 };
909
910 _cleanup_free_ char *fs = NULL;
911 const struct Attribute *i;
912 int r, unified;
913
914 assert(path);
915
916 if (uid == UID_INVALID && gid == GID_INVALID)
917 return 0;
918
919 unified = cg_unified_controller(controller);
920 if (unified < 0)
921 return unified;
922
923 /* Configure access to the cgroup itself */
924 r = cg_get_path(controller, path, NULL, &fs);
925 if (r < 0)
926 return r;
927
928 r = chmod_and_chown(fs, 0755, uid, gid);
929 if (r < 0)
930 return r;
931
932 /* Configure access to the cgroup's attributes */
933 for (i = attributes[unified]; i->name; i++) {
934 fs = mfree(fs);
935
936 r = cg_get_path(controller, path, i->name, &fs);
937 if (r < 0)
938 return r;
939
940 r = chmod_and_chown(fs, 0644, uid, gid);
941 if (r < 0) {
942 if (i->fatal)
943 return r;
944
945 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
946 }
947 }
948
949 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
950 r = cg_hybrid_unified();
951 if (r < 0)
952 return r;
953 if (r > 0) {
954 /* Always propagate access mode from unified to legacy controller */
955 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
956 if (r < 0)
957 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
958 }
959 }
960
961 return 0;
962 }
963
964 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
965 _cleanup_free_ char *fs = NULL;
966 int r;
967
968 assert(path);
969 assert(name);
970 assert(value || size <= 0);
971
972 r = cg_get_path(controller, path, NULL, &fs);
973 if (r < 0)
974 return r;
975
976 if (setxattr(fs, name, value, size, flags) < 0)
977 return -errno;
978
979 return 0;
980 }
981
982 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
983 _cleanup_free_ char *fs = NULL;
984 ssize_t n;
985 int r;
986
987 assert(path);
988 assert(name);
989
990 r = cg_get_path(controller, path, NULL, &fs);
991 if (r < 0)
992 return r;
993
994 n = getxattr(fs, name, value, size);
995 if (n < 0)
996 return -errno;
997
998 return (int) n;
999 }
1000
1001 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1002 _cleanup_fclose_ FILE *f = NULL;
1003 char line[LINE_MAX];
1004 const char *fs, *controller_str;
1005 size_t cs = 0;
1006 int unified;
1007
1008 assert(path);
1009 assert(pid >= 0);
1010
1011 if (controller) {
1012 if (!cg_controller_is_valid(controller))
1013 return -EINVAL;
1014 } else
1015 controller = SYSTEMD_CGROUP_CONTROLLER;
1016
1017 unified = cg_unified_controller(controller);
1018 if (unified < 0)
1019 return unified;
1020 if (unified == 0) {
1021 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1022 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1023 else
1024 controller_str = controller;
1025
1026 cs = strlen(controller_str);
1027 }
1028
1029 fs = procfs_file_alloca(pid, "cgroup");
1030 f = fopen(fs, "re");
1031 if (!f)
1032 return errno == ENOENT ? -ESRCH : -errno;
1033
1034 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1035
1036 FOREACH_LINE(line, f, return -errno) {
1037 char *e, *p;
1038
1039 truncate_nl(line);
1040
1041 if (unified) {
1042 e = startswith(line, "0:");
1043 if (!e)
1044 continue;
1045
1046 e = strchr(e, ':');
1047 if (!e)
1048 continue;
1049 } else {
1050 char *l;
1051 size_t k;
1052 const char *word, *state;
1053 bool found = false;
1054
1055 l = strchr(line, ':');
1056 if (!l)
1057 continue;
1058
1059 l++;
1060 e = strchr(l, ':');
1061 if (!e)
1062 continue;
1063
1064 *e = 0;
1065 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
1066 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1067 found = true;
1068 break;
1069 }
1070 if (!found)
1071 continue;
1072 }
1073
1074 p = strdup(e + 1);
1075 if (!p)
1076 return -ENOMEM;
1077
1078 /* Truncate suffix indicating the process is a zombie */
1079 e = endswith(p, " (deleted)");
1080 if (e)
1081 *e = 0;
1082
1083 *path = p;
1084 return 0;
1085 }
1086
1087 return -ENODATA;
1088 }
1089
1090 int cg_install_release_agent(const char *controller, const char *agent) {
1091 _cleanup_free_ char *fs = NULL, *contents = NULL;
1092 const char *sc;
1093 int r;
1094
1095 assert(agent);
1096
1097 r = cg_unified_controller(controller);
1098 if (r < 0)
1099 return r;
1100 if (r > 0) /* doesn't apply to unified hierarchy */
1101 return -EOPNOTSUPP;
1102
1103 r = cg_get_path(controller, NULL, "release_agent", &fs);
1104 if (r < 0)
1105 return r;
1106
1107 r = read_one_line_file(fs, &contents);
1108 if (r < 0)
1109 return r;
1110
1111 sc = strstrip(contents);
1112 if (isempty(sc)) {
1113 r = write_string_file(fs, agent, 0);
1114 if (r < 0)
1115 return r;
1116 } else if (!path_equal(sc, agent))
1117 return -EEXIST;
1118
1119 fs = mfree(fs);
1120 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1121 if (r < 0)
1122 return r;
1123
1124 contents = mfree(contents);
1125 r = read_one_line_file(fs, &contents);
1126 if (r < 0)
1127 return r;
1128
1129 sc = strstrip(contents);
1130 if (streq(sc, "0")) {
1131 r = write_string_file(fs, "1", 0);
1132 if (r < 0)
1133 return r;
1134
1135 return 1;
1136 }
1137
1138 if (!streq(sc, "1"))
1139 return -EIO;
1140
1141 return 0;
1142 }
1143
1144 int cg_uninstall_release_agent(const char *controller) {
1145 _cleanup_free_ char *fs = NULL;
1146 int r;
1147
1148 r = cg_unified_controller(controller);
1149 if (r < 0)
1150 return r;
1151 if (r > 0) /* Doesn't apply to unified hierarchy */
1152 return -EOPNOTSUPP;
1153
1154 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1155 if (r < 0)
1156 return r;
1157
1158 r = write_string_file(fs, "0", 0);
1159 if (r < 0)
1160 return r;
1161
1162 fs = mfree(fs);
1163
1164 r = cg_get_path(controller, NULL, "release_agent", &fs);
1165 if (r < 0)
1166 return r;
1167
1168 r = write_string_file(fs, "", 0);
1169 if (r < 0)
1170 return r;
1171
1172 return 0;
1173 }
1174
1175 int cg_is_empty(const char *controller, const char *path) {
1176 _cleanup_fclose_ FILE *f = NULL;
1177 pid_t pid;
1178 int r;
1179
1180 assert(path);
1181
1182 r = cg_enumerate_processes(controller, path, &f);
1183 if (r == -ENOENT)
1184 return 1;
1185 if (r < 0)
1186 return r;
1187
1188 r = cg_read_pid(f, &pid);
1189 if (r < 0)
1190 return r;
1191
1192 return r == 0;
1193 }
1194
1195 int cg_is_empty_recursive(const char *controller, const char *path) {
1196 int r;
1197
1198 assert(path);
1199
1200 /* The root cgroup is always populated */
1201 if (controller && (isempty(path) || path_equal(path, "/")))
1202 return false;
1203
1204 r = cg_unified_controller(controller);
1205 if (r < 0)
1206 return r;
1207 if (r > 0) {
1208 _cleanup_free_ char *t = NULL;
1209
1210 /* On the unified hierarchy we can check empty state
1211 * via the "populated" attribute of "cgroup.events". */
1212
1213 r = cg_read_event(controller, path, "populated", &t);
1214 if (r < 0)
1215 return r;
1216
1217 return streq(t, "0");
1218 } else {
1219 _cleanup_closedir_ DIR *d = NULL;
1220 char *fn;
1221
1222 r = cg_is_empty(controller, path);
1223 if (r <= 0)
1224 return r;
1225
1226 r = cg_enumerate_subgroups(controller, path, &d);
1227 if (r == -ENOENT)
1228 return 1;
1229 if (r < 0)
1230 return r;
1231
1232 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1233 _cleanup_free_ char *p = NULL;
1234
1235 p = strjoin(path, "/", fn);
1236 free(fn);
1237 if (!p)
1238 return -ENOMEM;
1239
1240 r = cg_is_empty_recursive(controller, p);
1241 if (r <= 0)
1242 return r;
1243 }
1244 if (r < 0)
1245 return r;
1246
1247 return true;
1248 }
1249 }
1250
1251 int cg_split_spec(const char *spec, char **controller, char **path) {
1252 char *t = NULL, *u = NULL;
1253 const char *e;
1254
1255 assert(spec);
1256
1257 if (*spec == '/') {
1258 if (!path_is_normalized(spec))
1259 return -EINVAL;
1260
1261 if (path) {
1262 t = strdup(spec);
1263 if (!t)
1264 return -ENOMEM;
1265
1266 *path = path_kill_slashes(t);
1267 }
1268
1269 if (controller)
1270 *controller = NULL;
1271
1272 return 0;
1273 }
1274
1275 e = strchr(spec, ':');
1276 if (!e) {
1277 if (!cg_controller_is_valid(spec))
1278 return -EINVAL;
1279
1280 if (controller) {
1281 t = strdup(spec);
1282 if (!t)
1283 return -ENOMEM;
1284
1285 *controller = t;
1286 }
1287
1288 if (path)
1289 *path = NULL;
1290
1291 return 0;
1292 }
1293
1294 t = strndup(spec, e-spec);
1295 if (!t)
1296 return -ENOMEM;
1297 if (!cg_controller_is_valid(t)) {
1298 free(t);
1299 return -EINVAL;
1300 }
1301
1302 if (isempty(e+1))
1303 u = NULL;
1304 else {
1305 u = strdup(e+1);
1306 if (!u) {
1307 free(t);
1308 return -ENOMEM;
1309 }
1310
1311 if (!path_is_normalized(u) ||
1312 !path_is_absolute(u)) {
1313 free(t);
1314 free(u);
1315 return -EINVAL;
1316 }
1317
1318 path_kill_slashes(u);
1319 }
1320
1321 if (controller)
1322 *controller = t;
1323 else
1324 free(t);
1325
1326 if (path)
1327 *path = u;
1328 else
1329 free(u);
1330
1331 return 0;
1332 }
1333
1334 int cg_mangle_path(const char *path, char **result) {
1335 _cleanup_free_ char *c = NULL, *p = NULL;
1336 char *t;
1337 int r;
1338
1339 assert(path);
1340 assert(result);
1341
1342 /* First, check if it already is a filesystem path */
1343 if (path_startswith(path, "/sys/fs/cgroup")) {
1344
1345 t = strdup(path);
1346 if (!t)
1347 return -ENOMEM;
1348
1349 *result = path_kill_slashes(t);
1350 return 0;
1351 }
1352
1353 /* Otherwise, treat it as cg spec */
1354 r = cg_split_spec(path, &c, &p);
1355 if (r < 0)
1356 return r;
1357
1358 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1359 }
1360
1361 int cg_get_root_path(char **path) {
1362 char *p, *e;
1363 int r;
1364
1365 assert(path);
1366
1367 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1368 if (r < 0)
1369 return r;
1370
1371 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1372 if (!e)
1373 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1374 if (!e)
1375 e = endswith(p, "/system"); /* even more legacy */
1376 if (e)
1377 *e = 0;
1378
1379 *path = p;
1380 return 0;
1381 }
1382
1383 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1384 _cleanup_free_ char *rt = NULL;
1385 char *p;
1386 int r;
1387
1388 assert(cgroup);
1389 assert(shifted);
1390
1391 if (!root) {
1392 /* If the root was specified let's use that, otherwise
1393 * let's determine it from PID 1 */
1394
1395 r = cg_get_root_path(&rt);
1396 if (r < 0)
1397 return r;
1398
1399 root = rt;
1400 }
1401
1402 p = path_startswith(cgroup, root);
1403 if (p && p > cgroup)
1404 *shifted = p - 1;
1405 else
1406 *shifted = cgroup;
1407
1408 return 0;
1409 }
1410
1411 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1412 _cleanup_free_ char *raw = NULL;
1413 const char *c;
1414 int r;
1415
1416 assert(pid >= 0);
1417 assert(cgroup);
1418
1419 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1420 if (r < 0)
1421 return r;
1422
1423 r = cg_shift_path(raw, root, &c);
1424 if (r < 0)
1425 return r;
1426
1427 if (c == raw) {
1428 *cgroup = raw;
1429 raw = NULL;
1430 } else {
1431 char *n;
1432
1433 n = strdup(c);
1434 if (!n)
1435 return -ENOMEM;
1436
1437 *cgroup = n;
1438 }
1439
1440 return 0;
1441 }
1442
1443 int cg_path_decode_unit(const char *cgroup, char **unit) {
1444 char *c, *s;
1445 size_t n;
1446
1447 assert(cgroup);
1448 assert(unit);
1449
1450 n = strcspn(cgroup, "/");
1451 if (n < 3)
1452 return -ENXIO;
1453
1454 c = strndupa(cgroup, n);
1455 c = cg_unescape(c);
1456
1457 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1458 return -ENXIO;
1459
1460 s = strdup(c);
1461 if (!s)
1462 return -ENOMEM;
1463
1464 *unit = s;
1465 return 0;
1466 }
1467
1468 static bool valid_slice_name(const char *p, size_t n) {
1469
1470 if (!p)
1471 return false;
1472
1473 if (n < STRLEN("x.slice"))
1474 return false;
1475
1476 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1477 char buf[n+1], *c;
1478
1479 memcpy(buf, p, n);
1480 buf[n] = 0;
1481
1482 c = cg_unescape(buf);
1483
1484 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1485 }
1486
1487 return false;
1488 }
1489
1490 static const char *skip_slices(const char *p) {
1491 assert(p);
1492
1493 /* Skips over all slice assignments */
1494
1495 for (;;) {
1496 size_t n;
1497
1498 p += strspn(p, "/");
1499
1500 n = strcspn(p, "/");
1501 if (!valid_slice_name(p, n))
1502 return p;
1503
1504 p += n;
1505 }
1506 }
1507
1508 int cg_path_get_unit(const char *path, char **ret) {
1509 const char *e;
1510 char *unit;
1511 int r;
1512
1513 assert(path);
1514 assert(ret);
1515
1516 e = skip_slices(path);
1517
1518 r = cg_path_decode_unit(e, &unit);
1519 if (r < 0)
1520 return r;
1521
1522 /* We skipped over the slices, don't accept any now */
1523 if (endswith(unit, ".slice")) {
1524 free(unit);
1525 return -ENXIO;
1526 }
1527
1528 *ret = unit;
1529 return 0;
1530 }
1531
1532 int cg_pid_get_unit(pid_t pid, char **unit) {
1533 _cleanup_free_ char *cgroup = NULL;
1534 int r;
1535
1536 assert(unit);
1537
1538 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1539 if (r < 0)
1540 return r;
1541
1542 return cg_path_get_unit(cgroup, unit);
1543 }
1544
1545 /**
1546 * Skip session-*.scope, but require it to be there.
1547 */
1548 static const char *skip_session(const char *p) {
1549 size_t n;
1550
1551 if (isempty(p))
1552 return NULL;
1553
1554 p += strspn(p, "/");
1555
1556 n = strcspn(p, "/");
1557 if (n < STRLEN("session-x.scope"))
1558 return NULL;
1559
1560 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1561 char buf[n - 8 - 6 + 1];
1562
1563 memcpy(buf, p + 8, n - 8 - 6);
1564 buf[n - 8 - 6] = 0;
1565
1566 /* Note that session scopes never need unescaping,
1567 * since they cannot conflict with the kernel's own
1568 * names, hence we don't need to call cg_unescape()
1569 * here. */
1570
1571 if (!session_id_valid(buf))
1572 return false;
1573
1574 p += n;
1575 p += strspn(p, "/");
1576 return p;
1577 }
1578
1579 return NULL;
1580 }
1581
1582 /**
1583 * Skip user@*.service, but require it to be there.
1584 */
1585 static const char *skip_user_manager(const char *p) {
1586 size_t n;
1587
1588 if (isempty(p))
1589 return NULL;
1590
1591 p += strspn(p, "/");
1592
1593 n = strcspn(p, "/");
1594 if (n < STRLEN("user@x.service"))
1595 return NULL;
1596
1597 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1598 char buf[n - 5 - 8 + 1];
1599
1600 memcpy(buf, p + 5, n - 5 - 8);
1601 buf[n - 5 - 8] = 0;
1602
1603 /* Note that user manager services never need unescaping,
1604 * since they cannot conflict with the kernel's own
1605 * names, hence we don't need to call cg_unescape()
1606 * here. */
1607
1608 if (parse_uid(buf, NULL) < 0)
1609 return NULL;
1610
1611 p += n;
1612 p += strspn(p, "/");
1613
1614 return p;
1615 }
1616
1617 return NULL;
1618 }
1619
1620 static const char *skip_user_prefix(const char *path) {
1621 const char *e, *t;
1622
1623 assert(path);
1624
1625 /* Skip slices, if there are any */
1626 e = skip_slices(path);
1627
1628 /* Skip the user manager, if it's in the path now... */
1629 t = skip_user_manager(e);
1630 if (t)
1631 return t;
1632
1633 /* Alternatively skip the user session if it is in the path... */
1634 return skip_session(e);
1635 }
1636
1637 int cg_path_get_user_unit(const char *path, char **ret) {
1638 const char *t;
1639
1640 assert(path);
1641 assert(ret);
1642
1643 t = skip_user_prefix(path);
1644 if (!t)
1645 return -ENXIO;
1646
1647 /* And from here on it looks pretty much the same as for a
1648 * system unit, hence let's use the same parser from here
1649 * on. */
1650 return cg_path_get_unit(t, ret);
1651 }
1652
1653 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1654 _cleanup_free_ char *cgroup = NULL;
1655 int r;
1656
1657 assert(unit);
1658
1659 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1660 if (r < 0)
1661 return r;
1662
1663 return cg_path_get_user_unit(cgroup, unit);
1664 }
1665
1666 int cg_path_get_machine_name(const char *path, char **machine) {
1667 _cleanup_free_ char *u = NULL;
1668 const char *sl;
1669 int r;
1670
1671 r = cg_path_get_unit(path, &u);
1672 if (r < 0)
1673 return r;
1674
1675 sl = strjoina("/run/systemd/machines/unit:", u);
1676 return readlink_malloc(sl, machine);
1677 }
1678
1679 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1680 _cleanup_free_ char *cgroup = NULL;
1681 int r;
1682
1683 assert(machine);
1684
1685 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1686 if (r < 0)
1687 return r;
1688
1689 return cg_path_get_machine_name(cgroup, machine);
1690 }
1691
1692 int cg_path_get_session(const char *path, char **session) {
1693 _cleanup_free_ char *unit = NULL;
1694 char *start, *end;
1695 int r;
1696
1697 assert(path);
1698
1699 r = cg_path_get_unit(path, &unit);
1700 if (r < 0)
1701 return r;
1702
1703 start = startswith(unit, "session-");
1704 if (!start)
1705 return -ENXIO;
1706 end = endswith(start, ".scope");
1707 if (!end)
1708 return -ENXIO;
1709
1710 *end = 0;
1711 if (!session_id_valid(start))
1712 return -ENXIO;
1713
1714 if (session) {
1715 char *rr;
1716
1717 rr = strdup(start);
1718 if (!rr)
1719 return -ENOMEM;
1720
1721 *session = rr;
1722 }
1723
1724 return 0;
1725 }
1726
1727 int cg_pid_get_session(pid_t pid, char **session) {
1728 _cleanup_free_ char *cgroup = NULL;
1729 int r;
1730
1731 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1732 if (r < 0)
1733 return r;
1734
1735 return cg_path_get_session(cgroup, session);
1736 }
1737
1738 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1739 _cleanup_free_ char *slice = NULL;
1740 char *start, *end;
1741 int r;
1742
1743 assert(path);
1744
1745 r = cg_path_get_slice(path, &slice);
1746 if (r < 0)
1747 return r;
1748
1749 start = startswith(slice, "user-");
1750 if (!start)
1751 return -ENXIO;
1752 end = endswith(start, ".slice");
1753 if (!end)
1754 return -ENXIO;
1755
1756 *end = 0;
1757 if (parse_uid(start, uid) < 0)
1758 return -ENXIO;
1759
1760 return 0;
1761 }
1762
1763 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1764 _cleanup_free_ char *cgroup = NULL;
1765 int r;
1766
1767 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1768 if (r < 0)
1769 return r;
1770
1771 return cg_path_get_owner_uid(cgroup, uid);
1772 }
1773
1774 int cg_path_get_slice(const char *p, char **slice) {
1775 const char *e = NULL;
1776
1777 assert(p);
1778 assert(slice);
1779
1780 /* Finds the right-most slice unit from the beginning, but
1781 * stops before we come to the first non-slice unit. */
1782
1783 for (;;) {
1784 size_t n;
1785
1786 p += strspn(p, "/");
1787
1788 n = strcspn(p, "/");
1789 if (!valid_slice_name(p, n)) {
1790
1791 if (!e) {
1792 char *s;
1793
1794 s = strdup(SPECIAL_ROOT_SLICE);
1795 if (!s)
1796 return -ENOMEM;
1797
1798 *slice = s;
1799 return 0;
1800 }
1801
1802 return cg_path_decode_unit(e, slice);
1803 }
1804
1805 e = p;
1806 p += n;
1807 }
1808 }
1809
1810 int cg_pid_get_slice(pid_t pid, char **slice) {
1811 _cleanup_free_ char *cgroup = NULL;
1812 int r;
1813
1814 assert(slice);
1815
1816 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1817 if (r < 0)
1818 return r;
1819
1820 return cg_path_get_slice(cgroup, slice);
1821 }
1822
1823 int cg_path_get_user_slice(const char *p, char **slice) {
1824 const char *t;
1825 assert(p);
1826 assert(slice);
1827
1828 t = skip_user_prefix(p);
1829 if (!t)
1830 return -ENXIO;
1831
1832 /* And now it looks pretty much the same as for a system
1833 * slice, so let's just use the same parser from here on. */
1834 return cg_path_get_slice(t, slice);
1835 }
1836
1837 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1838 _cleanup_free_ char *cgroup = NULL;
1839 int r;
1840
1841 assert(slice);
1842
1843 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1844 if (r < 0)
1845 return r;
1846
1847 return cg_path_get_user_slice(cgroup, slice);
1848 }
1849
1850 char *cg_escape(const char *p) {
1851 bool need_prefix = false;
1852
1853 /* This implements very minimal escaping for names to be used
1854 * as file names in the cgroup tree: any name which might
1855 * conflict with a kernel name or is prefixed with '_' is
1856 * prefixed with a '_'. That way, when reading cgroup names it
1857 * is sufficient to remove a single prefixing underscore if
1858 * there is one. */
1859
1860 /* The return value of this function (unlike cg_unescape())
1861 * needs free()! */
1862
1863 if (IN_SET(p[0], 0, '_', '.') ||
1864 streq(p, "notify_on_release") ||
1865 streq(p, "release_agent") ||
1866 streq(p, "tasks") ||
1867 startswith(p, "cgroup."))
1868 need_prefix = true;
1869 else {
1870 const char *dot;
1871
1872 dot = strrchr(p, '.');
1873 if (dot) {
1874 CGroupController c;
1875 size_t l = dot - p;
1876
1877 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1878 const char *n;
1879
1880 n = cgroup_controller_to_string(c);
1881
1882 if (l != strlen(n))
1883 continue;
1884
1885 if (memcmp(p, n, l) != 0)
1886 continue;
1887
1888 need_prefix = true;
1889 break;
1890 }
1891 }
1892 }
1893
1894 if (need_prefix)
1895 return strappend("_", p);
1896
1897 return strdup(p);
1898 }
1899
1900 char *cg_unescape(const char *p) {
1901 assert(p);
1902
1903 /* The return value of this function (unlike cg_escape())
1904 * doesn't need free()! */
1905
1906 if (p[0] == '_')
1907 return (char*) p+1;
1908
1909 return (char*) p;
1910 }
1911
1912 #define CONTROLLER_VALID \
1913 DIGITS LETTERS \
1914 "_"
1915
1916 bool cg_controller_is_valid(const char *p) {
1917 const char *t, *s;
1918
1919 if (!p)
1920 return false;
1921
1922 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1923 return true;
1924
1925 s = startswith(p, "name=");
1926 if (s)
1927 p = s;
1928
1929 if (IN_SET(*p, 0, '_'))
1930 return false;
1931
1932 for (t = p; *t; t++)
1933 if (!strchr(CONTROLLER_VALID, *t))
1934 return false;
1935
1936 if (t - p > FILENAME_MAX)
1937 return false;
1938
1939 return true;
1940 }
1941
1942 int cg_slice_to_path(const char *unit, char **ret) {
1943 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1944 const char *dash;
1945 int r;
1946
1947 assert(unit);
1948 assert(ret);
1949
1950 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1951 char *x;
1952
1953 x = strdup("");
1954 if (!x)
1955 return -ENOMEM;
1956 *ret = x;
1957 return 0;
1958 }
1959
1960 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1961 return -EINVAL;
1962
1963 if (!endswith(unit, ".slice"))
1964 return -EINVAL;
1965
1966 r = unit_name_to_prefix(unit, &p);
1967 if (r < 0)
1968 return r;
1969
1970 dash = strchr(p, '-');
1971
1972 /* Don't allow initial dashes */
1973 if (dash == p)
1974 return -EINVAL;
1975
1976 while (dash) {
1977 _cleanup_free_ char *escaped = NULL;
1978 char n[dash - p + sizeof(".slice")];
1979
1980 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1981 /* msan doesn't instrument stpncpy, so it thinks
1982 * n is later used unitialized:
1983 * https://github.com/google/sanitizers/issues/926
1984 */
1985 zero(n);
1986 #endif
1987
1988 /* Don't allow trailing or double dashes */
1989 if (IN_SET(dash[1], 0, '-'))
1990 return -EINVAL;
1991
1992 strcpy(stpncpy(n, p, dash - p), ".slice");
1993 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1994 return -EINVAL;
1995
1996 escaped = cg_escape(n);
1997 if (!escaped)
1998 return -ENOMEM;
1999
2000 if (!strextend(&s, escaped, "/", NULL))
2001 return -ENOMEM;
2002
2003 dash = strchr(dash+1, '-');
2004 }
2005
2006 e = cg_escape(unit);
2007 if (!e)
2008 return -ENOMEM;
2009
2010 if (!strextend(&s, e, NULL))
2011 return -ENOMEM;
2012
2013 *ret = s;
2014 s = NULL;
2015
2016 return 0;
2017 }
2018
2019 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2020 _cleanup_free_ char *p = NULL;
2021 int r;
2022
2023 r = cg_get_path(controller, path, attribute, &p);
2024 if (r < 0)
2025 return r;
2026
2027 return write_string_file(p, value, 0);
2028 }
2029
2030 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2031 _cleanup_free_ char *p = NULL;
2032 int r;
2033
2034 r = cg_get_path(controller, path, attribute, &p);
2035 if (r < 0)
2036 return r;
2037
2038 return read_one_line_file(p, ret);
2039 }
2040
2041 int cg_get_keyed_attribute(
2042 const char *controller,
2043 const char *path,
2044 const char *attribute,
2045 char **keys,
2046 char **ret_values) {
2047
2048 _cleanup_free_ char *filename = NULL, *contents = NULL;
2049 const char *p;
2050 size_t n, i, n_done = 0;
2051 char **v;
2052 int r;
2053
2054 /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
2055 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2056 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2057 *
2058 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
2059
2060 r = cg_get_path(controller, path, attribute, &filename);
2061 if (r < 0)
2062 return r;
2063
2064 r = read_full_file(filename, &contents, NULL);
2065 if (r < 0)
2066 return r;
2067
2068 n = strv_length(keys);
2069 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2070 return 0;
2071
2072 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2073 v = newa0(char*, n);
2074
2075 for (p = contents; *p;) {
2076 const char *w = NULL;
2077
2078 for (i = 0; i < n; i++)
2079 if (!v[i]) {
2080 w = first_word(p, keys[i]);
2081 if (w)
2082 break;
2083 }
2084
2085 if (w) {
2086 size_t l;
2087
2088 l = strcspn(w, NEWLINE);
2089 v[i] = strndup(w, l);
2090 if (!v[i]) {
2091 r = -ENOMEM;
2092 goto fail;
2093 }
2094
2095 n_done++;
2096 if (n_done >= n)
2097 goto done;
2098
2099 p = w + l;
2100 } else
2101 p += strcspn(p, NEWLINE);
2102
2103 p += strspn(p, NEWLINE);
2104 }
2105
2106 r = -ENXIO;
2107
2108 fail:
2109 for (i = 0; i < n; i++)
2110 free(v[i]);
2111
2112 return r;
2113
2114 done:
2115 memcpy(ret_values, v, sizeof(char*) * n);
2116 return 0;
2117
2118 }
2119
2120 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2121 CGroupController c;
2122 int r;
2123
2124 /* This one will create a cgroup in our private tree, but also
2125 * duplicate it in the trees specified in mask, and remove it
2126 * in all others */
2127
2128 /* First create the cgroup in our own hierarchy. */
2129 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2130 if (r < 0)
2131 return r;
2132
2133 /* If we are in the unified hierarchy, we are done now */
2134 r = cg_all_unified();
2135 if (r < 0)
2136 return r;
2137 if (r > 0)
2138 return 0;
2139
2140 /* Otherwise, do the same in the other hierarchies */
2141 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2142 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2143 const char *n;
2144
2145 n = cgroup_controller_to_string(c);
2146
2147 if (mask & bit)
2148 (void) cg_create(n, path);
2149 else if (supported & bit)
2150 (void) cg_trim(n, path, true);
2151 }
2152
2153 return 0;
2154 }
2155
2156 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2157 CGroupController c;
2158 int r;
2159
2160 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2161 if (r < 0)
2162 return r;
2163
2164 r = cg_all_unified();
2165 if (r < 0)
2166 return r;
2167 if (r > 0)
2168 return 0;
2169
2170 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2171 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2172 const char *p = NULL;
2173
2174 if (!(supported & bit))
2175 continue;
2176
2177 if (path_callback)
2178 p = path_callback(bit, userdata);
2179
2180 if (!p)
2181 p = path;
2182
2183 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2184 }
2185
2186 return 0;
2187 }
2188
2189 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2190 Iterator i;
2191 void *pidp;
2192 int r = 0;
2193
2194 SET_FOREACH(pidp, pids, i) {
2195 pid_t pid = PTR_TO_PID(pidp);
2196 int q;
2197
2198 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2199 if (q < 0 && r >= 0)
2200 r = q;
2201 }
2202
2203 return r;
2204 }
2205
2206 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2207 CGroupController c;
2208 int r = 0, q;
2209
2210 if (!path_equal(from, to)) {
2211 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2212 if (r < 0)
2213 return r;
2214 }
2215
2216 q = cg_all_unified();
2217 if (q < 0)
2218 return q;
2219 if (q > 0)
2220 return r;
2221
2222 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2223 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2224 const char *p = NULL;
2225
2226 if (!(supported & bit))
2227 continue;
2228
2229 if (to_callback)
2230 p = to_callback(bit, userdata);
2231
2232 if (!p)
2233 p = to;
2234
2235 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2236 }
2237
2238 return 0;
2239 }
2240
2241 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2242 CGroupController c;
2243 int r, q;
2244
2245 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2246 if (r < 0)
2247 return r;
2248
2249 q = cg_all_unified();
2250 if (q < 0)
2251 return q;
2252 if (q > 0)
2253 return r;
2254
2255 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2256 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2257
2258 if (!(supported & bit))
2259 continue;
2260
2261 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2262 }
2263
2264 return 0;
2265 }
2266
2267 int cg_mask_to_string(CGroupMask mask, char **ret) {
2268 _cleanup_free_ char *s = NULL;
2269 size_t n = 0, allocated = 0;
2270 bool space = false;
2271 CGroupController c;
2272
2273 assert(ret);
2274
2275 if (mask == 0) {
2276 *ret = NULL;
2277 return 0;
2278 }
2279
2280 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2281 const char *k;
2282 size_t l;
2283
2284 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2285 continue;
2286
2287 k = cgroup_controller_to_string(c);
2288 l = strlen(k);
2289
2290 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2291 return -ENOMEM;
2292
2293 if (space)
2294 s[n] = ' ';
2295 memcpy(s + n + space, k, l);
2296 n += space + l;
2297
2298 space = true;
2299 }
2300
2301 assert(s);
2302
2303 s[n] = 0;
2304 *ret = s;
2305 s = NULL;
2306
2307 return 0;
2308 }
2309
2310 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2311 assert(mask);
2312 assert(value);
2313
2314 for (;;) {
2315 _cleanup_free_ char *n = NULL;
2316 CGroupController v;
2317 int r;
2318
2319 r = extract_first_word(&value, &n, NULL, 0);
2320 if (r < 0)
2321 return r;
2322 if (r == 0)
2323 break;
2324
2325 v = cgroup_controller_from_string(n);
2326 if (v < 0)
2327 continue;
2328
2329 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2330 }
2331 return 0;
2332 }
2333
2334 int cg_mask_supported(CGroupMask *ret) {
2335 CGroupMask mask = 0;
2336 int r;
2337
2338 /* Determines the mask of supported cgroup controllers. Only
2339 * includes controllers we can make sense of and that are
2340 * actually accessible. */
2341
2342 r = cg_all_unified();
2343 if (r < 0)
2344 return r;
2345 if (r > 0) {
2346 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2347
2348 /* In the unified hierarchy we can read the supported
2349 * and accessible controllers from a the top-level
2350 * cgroup attribute */
2351
2352 r = cg_get_root_path(&root);
2353 if (r < 0)
2354 return r;
2355
2356 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2357 if (r < 0)
2358 return r;
2359
2360 r = read_one_line_file(path, &controllers);
2361 if (r < 0)
2362 return r;
2363
2364 r = cg_mask_from_string(controllers, &mask);
2365 if (r < 0)
2366 return r;
2367
2368 /* Currently, we support the cpu, memory, io and pids
2369 * controller in the unified hierarchy, mask
2370 * everything else off. */
2371 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2372
2373 } else {
2374 CGroupController c;
2375
2376 /* In the legacy hierarchy, we check whether which
2377 * hierarchies are mounted. */
2378
2379 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2380 const char *n;
2381
2382 n = cgroup_controller_to_string(c);
2383 if (controller_is_accessible(n) >= 0)
2384 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2385 }
2386 }
2387
2388 *ret = mask;
2389 return 0;
2390 }
2391
2392 int cg_kernel_controllers(Set **ret) {
2393 _cleanup_set_free_free_ Set *controllers = NULL;
2394 _cleanup_fclose_ FILE *f = NULL;
2395 int r;
2396
2397 assert(ret);
2398
2399 /* Determines the full list of kernel-known controllers. Might
2400 * include controllers we don't actually support, arbitrary
2401 * named hierarchies and controllers that aren't currently
2402 * accessible (because not mounted). */
2403
2404 controllers = set_new(&string_hash_ops);
2405 if (!controllers)
2406 return -ENOMEM;
2407
2408 f = fopen("/proc/cgroups", "re");
2409 if (!f) {
2410 if (errno == ENOENT) {
2411 *ret = NULL;
2412 return 0;
2413 }
2414
2415 return -errno;
2416 }
2417
2418 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2419
2420 /* Ignore the header line */
2421 (void) read_line(f, (size_t) -1, NULL);
2422
2423 for (;;) {
2424 char *controller;
2425 int enabled = 0;
2426
2427 errno = 0;
2428 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2429
2430 if (feof(f))
2431 break;
2432
2433 if (ferror(f) && errno > 0)
2434 return -errno;
2435
2436 return -EBADMSG;
2437 }
2438
2439 if (!enabled) {
2440 free(controller);
2441 continue;
2442 }
2443
2444 if (!cg_controller_is_valid(controller)) {
2445 free(controller);
2446 return -EBADMSG;
2447 }
2448
2449 r = set_consume(controllers, controller);
2450 if (r < 0)
2451 return r;
2452 }
2453
2454 *ret = controllers;
2455 controllers = NULL;
2456
2457 return 0;
2458 }
2459
2460 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2461
2462 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2463 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2464 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2465 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2466 *
2467 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2468 * process management but disable the compat dual layout, we return %true on
2469 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2470 */
2471 static thread_local bool unified_systemd_v232;
2472
2473 static int cg_unified_update(void) {
2474
2475 struct statfs fs;
2476
2477 /* Checks if we support the unified hierarchy. Returns an
2478 * error when the cgroup hierarchies aren't mounted yet or we
2479 * have any other trouble determining if the unified hierarchy
2480 * is supported. */
2481
2482 if (unified_cache >= CGROUP_UNIFIED_NONE)
2483 return 0;
2484
2485 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2486 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
2487
2488 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2489 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2490 unified_cache = CGROUP_UNIFIED_ALL;
2491 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2492 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2493 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2494 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2495 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2496 unified_systemd_v232 = false;
2497 } else {
2498 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2499 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2500
2501 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2502 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2503 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2504 unified_systemd_v232 = true;
2505 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2506 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2507 unified_cache = CGROUP_UNIFIED_NONE;
2508 } else {
2509 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2510 (unsigned long long) fs.f_type);
2511 unified_cache = CGROUP_UNIFIED_NONE;
2512 }
2513 }
2514 } else {
2515 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2516 (unsigned long long) fs.f_type);
2517 return -ENOMEDIUM;
2518 }
2519
2520 return 0;
2521 }
2522
2523 int cg_unified_controller(const char *controller) {
2524 int r;
2525
2526 r = cg_unified_update();
2527 if (r < 0)
2528 return r;
2529
2530 if (unified_cache == CGROUP_UNIFIED_NONE)
2531 return false;
2532
2533 if (unified_cache >= CGROUP_UNIFIED_ALL)
2534 return true;
2535
2536 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2537 }
2538
2539 int cg_all_unified(void) {
2540 int r;
2541
2542 r = cg_unified_update();
2543 if (r < 0)
2544 return r;
2545
2546 return unified_cache >= CGROUP_UNIFIED_ALL;
2547 }
2548
2549 int cg_hybrid_unified(void) {
2550 int r;
2551
2552 r = cg_unified_update();
2553 if (r < 0)
2554 return r;
2555
2556 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2557 }
2558
2559 int cg_unified_flush(void) {
2560 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2561
2562 return cg_unified_update();
2563 }
2564
2565 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2566 _cleanup_fclose_ FILE *f = NULL;
2567 _cleanup_free_ char *fs = NULL;
2568 CGroupController c;
2569 int r;
2570
2571 assert(p);
2572
2573 if (supported == 0)
2574 return 0;
2575
2576 r = cg_all_unified();
2577 if (r < 0)
2578 return r;
2579 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2580 return 0;
2581
2582 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2583 if (r < 0)
2584 return r;
2585
2586 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2587 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2588 const char *n;
2589
2590 if (!(supported & bit))
2591 continue;
2592
2593 n = cgroup_controller_to_string(c);
2594 {
2595 char s[1 + strlen(n) + 1];
2596
2597 s[0] = mask & bit ? '+' : '-';
2598 strcpy(s + 1, n);
2599
2600 if (!f) {
2601 f = fopen(fs, "we");
2602 if (!f) {
2603 log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2604 break;
2605 }
2606 }
2607
2608 r = write_string_stream(f, s, 0);
2609 if (r < 0)
2610 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2611 }
2612 }
2613
2614 return 0;
2615 }
2616
2617 bool cg_is_unified_wanted(void) {
2618 static thread_local int wanted = -1;
2619 int r;
2620 bool b;
2621 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2622
2623 /* If we have a cached value, return that. */
2624 if (wanted >= 0)
2625 return wanted;
2626
2627 /* If the hierarchy is already mounted, then follow whatever
2628 * was chosen for it. */
2629 if (cg_unified_flush() >= 0)
2630 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2631
2632 /* Otherwise, let's see what the kernel command line has to say.
2633 * Since checking is expensive, cache a non-error result. */
2634 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2635
2636 return (wanted = r > 0 ? b : is_default);
2637 }
2638
2639 bool cg_is_legacy_wanted(void) {
2640 static thread_local int wanted = -1;
2641
2642 /* If we have a cached value, return that. */
2643 if (wanted >= 0)
2644 return wanted;
2645
2646 /* Check if we have cgroups2 already mounted. */
2647 if (cg_unified_flush() >= 0 &&
2648 unified_cache == CGROUP_UNIFIED_ALL)
2649 return (wanted = false);
2650
2651 /* Otherwise, assume that at least partial legacy is wanted,
2652 * since cgroups2 should already be mounted at this point. */
2653 return (wanted = true);
2654 }
2655
2656 bool cg_is_hybrid_wanted(void) {
2657 static thread_local int wanted = -1;
2658 int r;
2659 bool b;
2660 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2661 /* We default to true if the default is "hybrid", obviously,
2662 * but also when the default is "unified", because if we get
2663 * called, it means that unified hierarchy was not mounted. */
2664
2665 /* If we have a cached value, return that. */
2666 if (wanted >= 0)
2667 return wanted;
2668
2669 /* If the hierarchy is already mounted, then follow whatever
2670 * was chosen for it. */
2671 if (cg_unified_flush() >= 0 &&
2672 unified_cache == CGROUP_UNIFIED_ALL)
2673 return (wanted = false);
2674
2675 /* Otherwise, let's see what the kernel command line has to say.
2676 * Since checking is expensive, cache a non-error result. */
2677 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2678
2679 /* The meaning of the kernel option is reversed wrt. to the return value
2680 * of this function, hence the negation. */
2681 return (wanted = r > 0 ? !b : is_default);
2682 }
2683
2684 int cg_weight_parse(const char *s, uint64_t *ret) {
2685 uint64_t u;
2686 int r;
2687
2688 if (isempty(s)) {
2689 *ret = CGROUP_WEIGHT_INVALID;
2690 return 0;
2691 }
2692
2693 r = safe_atou64(s, &u);
2694 if (r < 0)
2695 return r;
2696
2697 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2698 return -ERANGE;
2699
2700 *ret = u;
2701 return 0;
2702 }
2703
2704 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2705 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2706 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2707 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2708 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2709 };
2710
2711 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2712 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2713 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2714 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2715 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2716 };
2717
2718 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2719
2720 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2721 uint64_t u;
2722 int r;
2723
2724 if (isempty(s)) {
2725 *ret = CGROUP_CPU_SHARES_INVALID;
2726 return 0;
2727 }
2728
2729 r = safe_atou64(s, &u);
2730 if (r < 0)
2731 return r;
2732
2733 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2734 return -ERANGE;
2735
2736 *ret = u;
2737 return 0;
2738 }
2739
2740 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2741 uint64_t u;
2742 int r;
2743
2744 if (isempty(s)) {
2745 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2746 return 0;
2747 }
2748
2749 r = safe_atou64(s, &u);
2750 if (r < 0)
2751 return r;
2752
2753 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2754 return -ERANGE;
2755
2756 *ret = u;
2757 return 0;
2758 }
2759
2760 bool is_cgroup_fs(const struct statfs *s) {
2761 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2762 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2763 }
2764
2765 bool fd_is_cgroup_fs(int fd) {
2766 struct statfs s;
2767
2768 if (fstatfs(fd, &s) < 0)
2769 return -errno;
2770
2771 return is_cgroup_fs(&s);
2772 }
2773
2774 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2775 [CGROUP_CONTROLLER_CPU] = "cpu",
2776 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2777 [CGROUP_CONTROLLER_IO] = "io",
2778 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2779 [CGROUP_CONTROLLER_MEMORY] = "memory",
2780 [CGROUP_CONTROLLER_DEVICES] = "devices",
2781 [CGROUP_CONTROLLER_PIDS] = "pids",
2782 };
2783
2784 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);