]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #7042 from vcaputo/iteratedcache
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <dirent.h>
22 #include <errno.h>
23 #include <ftw.h>
24 #include <limits.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/statfs.h>
32 #include <sys/types.h>
33 #include <sys/xattr.h>
34 #include <unistd.h>
35
36 #include "alloc-util.h"
37 #include "cgroup-util.h"
38 #include "def.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "log.h"
46 #include "login-util.h"
47 #include "macro.h"
48 #include "missing.h"
49 #include "mkdir.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "proc-cmdline.h"
53 #include "process-util.h"
54 #include "set.h"
55 #include "special.h"
56 #include "stat-util.h"
57 #include "stdio-util.h"
58 #include "string-table.h"
59 #include "string-util.h"
60 #include "strv.h"
61 #include "unit-name.h"
62 #include "user-util.h"
63
64 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
65 _cleanup_free_ char *fs = NULL;
66 FILE *f;
67 int r;
68
69 assert(_f);
70
71 r = cg_get_path(controller, path, "cgroup.procs", &fs);
72 if (r < 0)
73 return r;
74
75 f = fopen(fs, "re");
76 if (!f)
77 return -errno;
78
79 *_f = f;
80 return 0;
81 }
82
83 int cg_read_pid(FILE *f, pid_t *_pid) {
84 unsigned long ul;
85
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
88
89 assert(f);
90 assert(_pid);
91
92 errno = 0;
93 if (fscanf(f, "%lu", &ul) != 1) {
94
95 if (feof(f))
96 return 0;
97
98 return errno > 0 ? -errno : -EIO;
99 }
100
101 if (ul <= 0)
102 return -EIO;
103
104 *_pid = (pid_t) ul;
105 return 1;
106 }
107
108 int cg_read_event(
109 const char *controller,
110 const char *path,
111 const char *event,
112 char **val) {
113
114 _cleanup_free_ char *events = NULL, *content = NULL;
115 char *p, *line;
116 int r;
117
118 r = cg_get_path(controller, path, "cgroup.events", &events);
119 if (r < 0)
120 return r;
121
122 r = read_full_file(events, &content, NULL);
123 if (r < 0)
124 return r;
125
126 p = content;
127 while ((line = strsep(&p, "\n"))) {
128 char *key;
129
130 key = strsep(&line, " ");
131 if (!key || !line)
132 return -EINVAL;
133
134 if (strcmp(key, event))
135 continue;
136
137 *val = strdup(line);
138 return 0;
139 }
140
141 return -ENOENT;
142 }
143
144 bool cg_ns_supported(void) {
145 static thread_local int enabled = -1;
146
147 if (enabled >= 0)
148 return enabled;
149
150 if (access("/proc/self/ns/cgroup", F_OK) == 0)
151 enabled = 1;
152 else
153 enabled = 0;
154
155 return enabled;
156 }
157
158 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
159 _cleanup_free_ char *fs = NULL;
160 int r;
161 DIR *d;
162
163 assert(_d);
164
165 /* This is not recursive! */
166
167 r = cg_get_path(controller, path, NULL, &fs);
168 if (r < 0)
169 return r;
170
171 d = opendir(fs);
172 if (!d)
173 return -errno;
174
175 *_d = d;
176 return 0;
177 }
178
179 int cg_read_subgroup(DIR *d, char **fn) {
180 struct dirent *de;
181
182 assert(d);
183 assert(fn);
184
185 FOREACH_DIRENT_ALL(de, d, return -errno) {
186 char *b;
187
188 if (de->d_type != DT_DIR)
189 continue;
190
191 if (dot_or_dot_dot(de->d_name))
192 continue;
193
194 b = strdup(de->d_name);
195 if (!b)
196 return -ENOMEM;
197
198 *fn = b;
199 return 1;
200 }
201
202 return 0;
203 }
204
205 int cg_rmdir(const char *controller, const char *path) {
206 _cleanup_free_ char *p = NULL;
207 int r;
208
209 r = cg_get_path(controller, path, NULL, &p);
210 if (r < 0)
211 return r;
212
213 r = rmdir(p);
214 if (r < 0 && errno != ENOENT)
215 return -errno;
216
217 r = cg_hybrid_unified();
218 if (r < 0)
219 return r;
220 if (r == 0)
221 return 0;
222
223 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
224 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
225 if (r < 0)
226 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
227 }
228
229 return 0;
230 }
231
232 int cg_kill(
233 const char *controller,
234 const char *path,
235 int sig,
236 CGroupFlags flags,
237 Set *s,
238 cg_kill_log_func_t log_kill,
239 void *userdata) {
240
241 _cleanup_set_free_ Set *allocated_set = NULL;
242 bool done = false;
243 int r, ret = 0;
244 pid_t my_pid;
245
246 assert(sig >= 0);
247
248 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
249 * SIGCONT on SIGKILL. */
250 if (IN_SET(sig, SIGCONT, SIGKILL))
251 flags &= ~CGROUP_SIGCONT;
252
253 /* This goes through the tasks list and kills them all. This
254 * is repeated until no further processes are added to the
255 * tasks list, to properly handle forking processes */
256
257 if (!s) {
258 s = allocated_set = set_new(NULL);
259 if (!s)
260 return -ENOMEM;
261 }
262
263 my_pid = getpid_cached();
264
265 do {
266 _cleanup_fclose_ FILE *f = NULL;
267 pid_t pid = 0;
268 done = true;
269
270 r = cg_enumerate_processes(controller, path, &f);
271 if (r < 0) {
272 if (ret >= 0 && r != -ENOENT)
273 return r;
274
275 return ret;
276 }
277
278 while ((r = cg_read_pid(f, &pid)) > 0) {
279
280 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
281 continue;
282
283 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
284 continue;
285
286 if (log_kill)
287 log_kill(pid, sig, userdata);
288
289 /* If we haven't killed this process yet, kill
290 * it */
291 if (kill(pid, sig) < 0) {
292 if (ret >= 0 && errno != ESRCH)
293 ret = -errno;
294 } else {
295 if (flags & CGROUP_SIGCONT)
296 (void) kill(pid, SIGCONT);
297
298 if (ret == 0)
299 ret = 1;
300 }
301
302 done = false;
303
304 r = set_put(s, PID_TO_PTR(pid));
305 if (r < 0) {
306 if (ret >= 0)
307 return r;
308
309 return ret;
310 }
311 }
312
313 if (r < 0) {
314 if (ret >= 0)
315 return r;
316
317 return ret;
318 }
319
320 /* To avoid racing against processes which fork
321 * quicker than we can kill them we repeat this until
322 * no new pids need to be killed. */
323
324 } while (!done);
325
326 return ret;
327 }
328
329 int cg_kill_recursive(
330 const char *controller,
331 const char *path,
332 int sig,
333 CGroupFlags flags,
334 Set *s,
335 cg_kill_log_func_t log_kill,
336 void *userdata) {
337
338 _cleanup_set_free_ Set *allocated_set = NULL;
339 _cleanup_closedir_ DIR *d = NULL;
340 int r, ret;
341 char *fn;
342
343 assert(path);
344 assert(sig >= 0);
345
346 if (!s) {
347 s = allocated_set = set_new(NULL);
348 if (!s)
349 return -ENOMEM;
350 }
351
352 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
353
354 r = cg_enumerate_subgroups(controller, path, &d);
355 if (r < 0) {
356 if (ret >= 0 && r != -ENOENT)
357 return r;
358
359 return ret;
360 }
361
362 while ((r = cg_read_subgroup(d, &fn)) > 0) {
363 _cleanup_free_ char *p = NULL;
364
365 p = strjoin(path, "/", fn);
366 free(fn);
367 if (!p)
368 return -ENOMEM;
369
370 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
371 if (r != 0 && ret >= 0)
372 ret = r;
373 }
374 if (ret >= 0 && r < 0)
375 ret = r;
376
377 if (flags & CGROUP_REMOVE) {
378 r = cg_rmdir(controller, path);
379 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
380 return r;
381 }
382
383 return ret;
384 }
385
386 int cg_migrate(
387 const char *cfrom,
388 const char *pfrom,
389 const char *cto,
390 const char *pto,
391 CGroupFlags flags) {
392
393 bool done = false;
394 _cleanup_set_free_ Set *s = NULL;
395 int r, ret = 0;
396 pid_t my_pid;
397
398 assert(cfrom);
399 assert(pfrom);
400 assert(cto);
401 assert(pto);
402
403 s = set_new(NULL);
404 if (!s)
405 return -ENOMEM;
406
407 my_pid = getpid_cached();
408
409 do {
410 _cleanup_fclose_ FILE *f = NULL;
411 pid_t pid = 0;
412 done = true;
413
414 r = cg_enumerate_processes(cfrom, pfrom, &f);
415 if (r < 0) {
416 if (ret >= 0 && r != -ENOENT)
417 return r;
418
419 return ret;
420 }
421
422 while ((r = cg_read_pid(f, &pid)) > 0) {
423
424 /* This might do weird stuff if we aren't a
425 * single-threaded program. However, we
426 * luckily know we are not */
427 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
428 continue;
429
430 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
431 continue;
432
433 /* Ignore kernel threads. Since they can only
434 * exist in the root cgroup, we only check for
435 * them there. */
436 if (cfrom &&
437 (isempty(pfrom) || path_equal(pfrom, "/")) &&
438 is_kernel_thread(pid) > 0)
439 continue;
440
441 r = cg_attach(cto, pto, pid);
442 if (r < 0) {
443 if (ret >= 0 && r != -ESRCH)
444 ret = r;
445 } else if (ret == 0)
446 ret = 1;
447
448 done = false;
449
450 r = set_put(s, PID_TO_PTR(pid));
451 if (r < 0) {
452 if (ret >= 0)
453 return r;
454
455 return ret;
456 }
457 }
458
459 if (r < 0) {
460 if (ret >= 0)
461 return r;
462
463 return ret;
464 }
465 } while (!done);
466
467 return ret;
468 }
469
470 int cg_migrate_recursive(
471 const char *cfrom,
472 const char *pfrom,
473 const char *cto,
474 const char *pto,
475 CGroupFlags flags) {
476
477 _cleanup_closedir_ DIR *d = NULL;
478 int r, ret = 0;
479 char *fn;
480
481 assert(cfrom);
482 assert(pfrom);
483 assert(cto);
484 assert(pto);
485
486 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
487
488 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
489 if (r < 0) {
490 if (ret >= 0 && r != -ENOENT)
491 return r;
492
493 return ret;
494 }
495
496 while ((r = cg_read_subgroup(d, &fn)) > 0) {
497 _cleanup_free_ char *p = NULL;
498
499 p = strjoin(pfrom, "/", fn);
500 free(fn);
501 if (!p)
502 return -ENOMEM;
503
504 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
505 if (r != 0 && ret >= 0)
506 ret = r;
507 }
508
509 if (r < 0 && ret >= 0)
510 ret = r;
511
512 if (flags & CGROUP_REMOVE) {
513 r = cg_rmdir(cfrom, pfrom);
514 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
515 return r;
516 }
517
518 return ret;
519 }
520
521 int cg_migrate_recursive_fallback(
522 const char *cfrom,
523 const char *pfrom,
524 const char *cto,
525 const char *pto,
526 CGroupFlags flags) {
527
528 int r;
529
530 assert(cfrom);
531 assert(pfrom);
532 assert(cto);
533 assert(pto);
534
535 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
536 if (r < 0) {
537 char prefix[strlen(pto) + 1];
538
539 /* This didn't work? Then let's try all prefixes of the destination */
540
541 PATH_FOREACH_PREFIX(prefix, pto) {
542 int q;
543
544 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
545 if (q >= 0)
546 return q;
547 }
548 }
549
550 return r;
551 }
552
553 static const char *controller_to_dirname(const char *controller) {
554 const char *e;
555
556 assert(controller);
557
558 /* Converts a controller name to the directory name below
559 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
560 * just cuts off the name= prefixed used for named
561 * hierarchies, if it is specified. */
562
563 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
564 if (cg_hybrid_unified() > 0)
565 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
566 else
567 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
568 }
569
570 e = startswith(controller, "name=");
571 if (e)
572 return e;
573
574 return controller;
575 }
576
577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
578 const char *dn;
579 char *t = NULL;
580
581 assert(fs);
582 assert(controller);
583
584 dn = controller_to_dirname(controller);
585
586 if (isempty(path) && isempty(suffix))
587 t = strappend("/sys/fs/cgroup/", dn);
588 else if (isempty(path))
589 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
590 else if (isempty(suffix))
591 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
592 else
593 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
594 if (!t)
595 return -ENOMEM;
596
597 *fs = t;
598 return 0;
599 }
600
601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
602 char *t;
603
604 assert(fs);
605
606 if (isempty(path) && isempty(suffix))
607 t = strdup("/sys/fs/cgroup");
608 else if (isempty(path))
609 t = strappend("/sys/fs/cgroup/", suffix);
610 else if (isempty(suffix))
611 t = strappend("/sys/fs/cgroup/", path);
612 else
613 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
614 if (!t)
615 return -ENOMEM;
616
617 *fs = t;
618 return 0;
619 }
620
621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
622 int r;
623
624 assert(fs);
625
626 if (!controller) {
627 char *t;
628
629 /* If no controller is specified, we return the path
630 * *below* the controllers, without any prefix. */
631
632 if (!path && !suffix)
633 return -EINVAL;
634
635 if (!suffix)
636 t = strdup(path);
637 else if (!path)
638 t = strdup(suffix);
639 else
640 t = strjoin(path, "/", suffix);
641 if (!t)
642 return -ENOMEM;
643
644 *fs = path_kill_slashes(t);
645 return 0;
646 }
647
648 if (!cg_controller_is_valid(controller))
649 return -EINVAL;
650
651 r = cg_all_unified();
652 if (r < 0)
653 return r;
654 if (r > 0)
655 r = join_path_unified(path, suffix, fs);
656 else
657 r = join_path_legacy(controller, path, suffix, fs);
658 if (r < 0)
659 return r;
660
661 path_kill_slashes(*fs);
662 return 0;
663 }
664
665 static int controller_is_accessible(const char *controller) {
666 int r;
667
668 assert(controller);
669
670 /* Checks whether a specific controller is accessible,
671 * i.e. its hierarchy mounted. In the unified hierarchy all
672 * controllers are considered accessible, except for the named
673 * hierarchies */
674
675 if (!cg_controller_is_valid(controller))
676 return -EINVAL;
677
678 r = cg_all_unified();
679 if (r < 0)
680 return r;
681 if (r > 0) {
682 /* We don't support named hierarchies if we are using
683 * the unified hierarchy. */
684
685 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
686 return 0;
687
688 if (startswith(controller, "name="))
689 return -EOPNOTSUPP;
690
691 } else {
692 const char *cc, *dn;
693
694 dn = controller_to_dirname(controller);
695 cc = strjoina("/sys/fs/cgroup/", dn);
696
697 if (laccess(cc, F_OK) < 0)
698 return -errno;
699 }
700
701 return 0;
702 }
703
704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
705 int r;
706
707 assert(controller);
708 assert(fs);
709
710 /* Check if the specified controller is actually accessible */
711 r = controller_is_accessible(controller);
712 if (r < 0)
713 return r;
714
715 return cg_get_path(controller, path, suffix, fs);
716 }
717
718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
719 assert(path);
720 assert(sb);
721 assert(ftwbuf);
722
723 if (typeflag != FTW_DP)
724 return 0;
725
726 if (ftwbuf->level < 1)
727 return 0;
728
729 (void) rmdir(path);
730 return 0;
731 }
732
733 int cg_trim(const char *controller, const char *path, bool delete_root) {
734 _cleanup_free_ char *fs = NULL;
735 int r = 0, q;
736
737 assert(path);
738
739 r = cg_get_path(controller, path, NULL, &fs);
740 if (r < 0)
741 return r;
742
743 errno = 0;
744 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
745 if (errno == ENOENT)
746 r = 0;
747 else if (errno > 0)
748 r = -errno;
749 else
750 r = -EIO;
751 }
752
753 if (delete_root) {
754 if (rmdir(fs) < 0 && errno != ENOENT)
755 return -errno;
756 }
757
758 q = cg_hybrid_unified();
759 if (q < 0)
760 return q;
761 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
762 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
763 if (q < 0)
764 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
765 }
766
767 return r;
768 }
769
770 int cg_create(const char *controller, const char *path) {
771 _cleanup_free_ char *fs = NULL;
772 int r;
773
774 r = cg_get_path_and_check(controller, path, NULL, &fs);
775 if (r < 0)
776 return r;
777
778 r = mkdir_parents(fs, 0755);
779 if (r < 0)
780 return r;
781
782 r = mkdir_errno_wrapper(fs, 0755);
783 if (r == -EEXIST)
784 return 0;
785 if (r < 0)
786 return r;
787
788 r = cg_hybrid_unified();
789 if (r < 0)
790 return r;
791
792 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
793 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
794 if (r < 0)
795 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
796 }
797
798 return 1;
799 }
800
801 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
802 int r, q;
803
804 assert(pid >= 0);
805
806 r = cg_create(controller, path);
807 if (r < 0)
808 return r;
809
810 q = cg_attach(controller, path, pid);
811 if (q < 0)
812 return q;
813
814 /* This does not remove the cgroup on failure */
815 return r;
816 }
817
818 int cg_attach(const char *controller, const char *path, pid_t pid) {
819 _cleanup_free_ char *fs = NULL;
820 char c[DECIMAL_STR_MAX(pid_t) + 2];
821 int r;
822
823 assert(path);
824 assert(pid >= 0);
825
826 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
827 if (r < 0)
828 return r;
829
830 if (pid == 0)
831 pid = getpid_cached();
832
833 xsprintf(c, PID_FMT "\n", pid);
834
835 r = write_string_file(fs, c, 0);
836 if (r < 0)
837 return r;
838
839 r = cg_hybrid_unified();
840 if (r < 0)
841 return r;
842
843 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
844 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
845 if (r < 0)
846 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
847 }
848
849 return 0;
850 }
851
852 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
853 int r;
854
855 assert(controller);
856 assert(path);
857 assert(pid >= 0);
858
859 r = cg_attach(controller, path, pid);
860 if (r < 0) {
861 char prefix[strlen(path) + 1];
862
863 /* This didn't work? Then let's try all prefixes of
864 * the destination */
865
866 PATH_FOREACH_PREFIX(prefix, path) {
867 int q;
868
869 q = cg_attach(controller, prefix, pid);
870 if (q >= 0)
871 return q;
872 }
873 }
874
875 return r;
876 }
877
878 int cg_set_access(
879 const char *controller,
880 const char *path,
881 uid_t uid,
882 gid_t gid) {
883
884 struct Attribute {
885 const char *name;
886 bool fatal;
887 };
888
889 /* cgroupsv1, aka legacy/non-unified */
890 static const struct Attribute legacy_attributes[] = {
891 { "cgroup.procs", true },
892 { "tasks", false },
893 { "cgroup.clone_children", false },
894 {},
895 };
896
897 /* cgroupsv2, aka unified */
898 static const struct Attribute unified_attributes[] = {
899 { "cgroup.procs", true },
900 { "cgroup.subtree_control", true },
901 { "cgroup.threads", false },
902 {},
903 };
904
905 static const struct Attribute* const attributes[] = {
906 [false] = legacy_attributes,
907 [true] = unified_attributes,
908 };
909
910 _cleanup_free_ char *fs = NULL;
911 const struct Attribute *i;
912 int r, unified;
913
914 assert(path);
915
916 if (uid == UID_INVALID && gid == GID_INVALID)
917 return 0;
918
919 unified = cg_unified_controller(controller);
920 if (unified < 0)
921 return unified;
922
923 /* Configure access to the cgroup itself */
924 r = cg_get_path(controller, path, NULL, &fs);
925 if (r < 0)
926 return r;
927
928 r = chmod_and_chown(fs, 0755, uid, gid);
929 if (r < 0)
930 return r;
931
932 /* Configure access to the cgroup's attributes */
933 for (i = attributes[unified]; i->name; i++) {
934 fs = mfree(fs);
935
936 r = cg_get_path(controller, path, i->name, &fs);
937 if (r < 0)
938 return r;
939
940 r = chmod_and_chown(fs, 0644, uid, gid);
941 if (r < 0) {
942 if (i->fatal)
943 return r;
944
945 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
946 }
947 }
948
949 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
950 r = cg_hybrid_unified();
951 if (r < 0)
952 return r;
953 if (r > 0) {
954 /* Always propagate access mode from unified to legacy controller */
955 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
956 if (r < 0)
957 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
958 }
959 }
960
961 return 0;
962 }
963
964 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
965 _cleanup_free_ char *fs = NULL;
966 int r;
967
968 assert(path);
969 assert(name);
970 assert(value || size <= 0);
971
972 r = cg_get_path(controller, path, NULL, &fs);
973 if (r < 0)
974 return r;
975
976 if (setxattr(fs, name, value, size, flags) < 0)
977 return -errno;
978
979 return 0;
980 }
981
982 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
983 _cleanup_free_ char *fs = NULL;
984 ssize_t n;
985 int r;
986
987 assert(path);
988 assert(name);
989
990 r = cg_get_path(controller, path, NULL, &fs);
991 if (r < 0)
992 return r;
993
994 n = getxattr(fs, name, value, size);
995 if (n < 0)
996 return -errno;
997
998 return (int) n;
999 }
1000
1001 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1002 _cleanup_fclose_ FILE *f = NULL;
1003 char line[LINE_MAX];
1004 const char *fs, *controller_str;
1005 size_t cs = 0;
1006 int unified;
1007
1008 assert(path);
1009 assert(pid >= 0);
1010
1011 if (controller) {
1012 if (!cg_controller_is_valid(controller))
1013 return -EINVAL;
1014 } else
1015 controller = SYSTEMD_CGROUP_CONTROLLER;
1016
1017 unified = cg_unified_controller(controller);
1018 if (unified < 0)
1019 return unified;
1020 if (unified == 0) {
1021 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1022 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1023 else
1024 controller_str = controller;
1025
1026 cs = strlen(controller_str);
1027 }
1028
1029 fs = procfs_file_alloca(pid, "cgroup");
1030 f = fopen(fs, "re");
1031 if (!f)
1032 return errno == ENOENT ? -ESRCH : -errno;
1033
1034 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1035
1036 FOREACH_LINE(line, f, return -errno) {
1037 char *e, *p;
1038
1039 truncate_nl(line);
1040
1041 if (unified) {
1042 e = startswith(line, "0:");
1043 if (!e)
1044 continue;
1045
1046 e = strchr(e, ':');
1047 if (!e)
1048 continue;
1049 } else {
1050 char *l;
1051 size_t k;
1052 const char *word, *state;
1053 bool found = false;
1054
1055 l = strchr(line, ':');
1056 if (!l)
1057 continue;
1058
1059 l++;
1060 e = strchr(l, ':');
1061 if (!e)
1062 continue;
1063
1064 *e = 0;
1065 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1066 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1067 found = true;
1068 break;
1069 }
1070 }
1071
1072 if (!found)
1073 continue;
1074 }
1075
1076 p = strdup(e + 1);
1077 if (!p)
1078 return -ENOMEM;
1079
1080 /* Truncate suffix indicating the process is a zombie */
1081 e = endswith(p, " (deleted)");
1082 if (e)
1083 *e = 0;
1084
1085 *path = p;
1086 return 0;
1087 }
1088
1089 return -ENODATA;
1090 }
1091
1092 int cg_install_release_agent(const char *controller, const char *agent) {
1093 _cleanup_free_ char *fs = NULL, *contents = NULL;
1094 const char *sc;
1095 int r;
1096
1097 assert(agent);
1098
1099 r = cg_unified_controller(controller);
1100 if (r < 0)
1101 return r;
1102 if (r > 0) /* doesn't apply to unified hierarchy */
1103 return -EOPNOTSUPP;
1104
1105 r = cg_get_path(controller, NULL, "release_agent", &fs);
1106 if (r < 0)
1107 return r;
1108
1109 r = read_one_line_file(fs, &contents);
1110 if (r < 0)
1111 return r;
1112
1113 sc = strstrip(contents);
1114 if (isempty(sc)) {
1115 r = write_string_file(fs, agent, 0);
1116 if (r < 0)
1117 return r;
1118 } else if (!path_equal(sc, agent))
1119 return -EEXIST;
1120
1121 fs = mfree(fs);
1122 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1123 if (r < 0)
1124 return r;
1125
1126 contents = mfree(contents);
1127 r = read_one_line_file(fs, &contents);
1128 if (r < 0)
1129 return r;
1130
1131 sc = strstrip(contents);
1132 if (streq(sc, "0")) {
1133 r = write_string_file(fs, "1", 0);
1134 if (r < 0)
1135 return r;
1136
1137 return 1;
1138 }
1139
1140 if (!streq(sc, "1"))
1141 return -EIO;
1142
1143 return 0;
1144 }
1145
1146 int cg_uninstall_release_agent(const char *controller) {
1147 _cleanup_free_ char *fs = NULL;
1148 int r;
1149
1150 r = cg_unified_controller(controller);
1151 if (r < 0)
1152 return r;
1153 if (r > 0) /* Doesn't apply to unified hierarchy */
1154 return -EOPNOTSUPP;
1155
1156 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1157 if (r < 0)
1158 return r;
1159
1160 r = write_string_file(fs, "0", 0);
1161 if (r < 0)
1162 return r;
1163
1164 fs = mfree(fs);
1165
1166 r = cg_get_path(controller, NULL, "release_agent", &fs);
1167 if (r < 0)
1168 return r;
1169
1170 r = write_string_file(fs, "", 0);
1171 if (r < 0)
1172 return r;
1173
1174 return 0;
1175 }
1176
1177 int cg_is_empty(const char *controller, const char *path) {
1178 _cleanup_fclose_ FILE *f = NULL;
1179 pid_t pid;
1180 int r;
1181
1182 assert(path);
1183
1184 r = cg_enumerate_processes(controller, path, &f);
1185 if (r == -ENOENT)
1186 return 1;
1187 if (r < 0)
1188 return r;
1189
1190 r = cg_read_pid(f, &pid);
1191 if (r < 0)
1192 return r;
1193
1194 return r == 0;
1195 }
1196
1197 int cg_is_empty_recursive(const char *controller, const char *path) {
1198 int r;
1199
1200 assert(path);
1201
1202 /* The root cgroup is always populated */
1203 if (controller && (isempty(path) || path_equal(path, "/")))
1204 return false;
1205
1206 r = cg_unified_controller(controller);
1207 if (r < 0)
1208 return r;
1209 if (r > 0) {
1210 _cleanup_free_ char *t = NULL;
1211
1212 /* On the unified hierarchy we can check empty state
1213 * via the "populated" attribute of "cgroup.events". */
1214
1215 r = cg_read_event(controller, path, "populated", &t);
1216 if (r < 0)
1217 return r;
1218
1219 return streq(t, "0");
1220 } else {
1221 _cleanup_closedir_ DIR *d = NULL;
1222 char *fn;
1223
1224 r = cg_is_empty(controller, path);
1225 if (r <= 0)
1226 return r;
1227
1228 r = cg_enumerate_subgroups(controller, path, &d);
1229 if (r == -ENOENT)
1230 return 1;
1231 if (r < 0)
1232 return r;
1233
1234 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1235 _cleanup_free_ char *p = NULL;
1236
1237 p = strjoin(path, "/", fn);
1238 free(fn);
1239 if (!p)
1240 return -ENOMEM;
1241
1242 r = cg_is_empty_recursive(controller, p);
1243 if (r <= 0)
1244 return r;
1245 }
1246 if (r < 0)
1247 return r;
1248
1249 return true;
1250 }
1251 }
1252
1253 int cg_split_spec(const char *spec, char **controller, char **path) {
1254 char *t = NULL, *u = NULL;
1255 const char *e;
1256
1257 assert(spec);
1258
1259 if (*spec == '/') {
1260 if (!path_is_normalized(spec))
1261 return -EINVAL;
1262
1263 if (path) {
1264 t = strdup(spec);
1265 if (!t)
1266 return -ENOMEM;
1267
1268 *path = path_kill_slashes(t);
1269 }
1270
1271 if (controller)
1272 *controller = NULL;
1273
1274 return 0;
1275 }
1276
1277 e = strchr(spec, ':');
1278 if (!e) {
1279 if (!cg_controller_is_valid(spec))
1280 return -EINVAL;
1281
1282 if (controller) {
1283 t = strdup(spec);
1284 if (!t)
1285 return -ENOMEM;
1286
1287 *controller = t;
1288 }
1289
1290 if (path)
1291 *path = NULL;
1292
1293 return 0;
1294 }
1295
1296 t = strndup(spec, e-spec);
1297 if (!t)
1298 return -ENOMEM;
1299 if (!cg_controller_is_valid(t)) {
1300 free(t);
1301 return -EINVAL;
1302 }
1303
1304 if (isempty(e+1))
1305 u = NULL;
1306 else {
1307 u = strdup(e+1);
1308 if (!u) {
1309 free(t);
1310 return -ENOMEM;
1311 }
1312
1313 if (!path_is_normalized(u) ||
1314 !path_is_absolute(u)) {
1315 free(t);
1316 free(u);
1317 return -EINVAL;
1318 }
1319
1320 path_kill_slashes(u);
1321 }
1322
1323 if (controller)
1324 *controller = t;
1325 else
1326 free(t);
1327
1328 if (path)
1329 *path = u;
1330 else
1331 free(u);
1332
1333 return 0;
1334 }
1335
1336 int cg_mangle_path(const char *path, char **result) {
1337 _cleanup_free_ char *c = NULL, *p = NULL;
1338 char *t;
1339 int r;
1340
1341 assert(path);
1342 assert(result);
1343
1344 /* First, check if it already is a filesystem path */
1345 if (path_startswith(path, "/sys/fs/cgroup")) {
1346
1347 t = strdup(path);
1348 if (!t)
1349 return -ENOMEM;
1350
1351 *result = path_kill_slashes(t);
1352 return 0;
1353 }
1354
1355 /* Otherwise, treat it as cg spec */
1356 r = cg_split_spec(path, &c, &p);
1357 if (r < 0)
1358 return r;
1359
1360 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1361 }
1362
1363 int cg_get_root_path(char **path) {
1364 char *p, *e;
1365 int r;
1366
1367 assert(path);
1368
1369 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1370 if (r < 0)
1371 return r;
1372
1373 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1374 if (!e)
1375 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1376 if (!e)
1377 e = endswith(p, "/system"); /* even more legacy */
1378 if (e)
1379 *e = 0;
1380
1381 *path = p;
1382 return 0;
1383 }
1384
1385 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1386 _cleanup_free_ char *rt = NULL;
1387 char *p;
1388 int r;
1389
1390 assert(cgroup);
1391 assert(shifted);
1392
1393 if (!root) {
1394 /* If the root was specified let's use that, otherwise
1395 * let's determine it from PID 1 */
1396
1397 r = cg_get_root_path(&rt);
1398 if (r < 0)
1399 return r;
1400
1401 root = rt;
1402 }
1403
1404 p = path_startswith(cgroup, root);
1405 if (p && p > cgroup)
1406 *shifted = p - 1;
1407 else
1408 *shifted = cgroup;
1409
1410 return 0;
1411 }
1412
1413 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1414 _cleanup_free_ char *raw = NULL;
1415 const char *c;
1416 int r;
1417
1418 assert(pid >= 0);
1419 assert(cgroup);
1420
1421 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1422 if (r < 0)
1423 return r;
1424
1425 r = cg_shift_path(raw, root, &c);
1426 if (r < 0)
1427 return r;
1428
1429 if (c == raw) {
1430 *cgroup = raw;
1431 raw = NULL;
1432 } else {
1433 char *n;
1434
1435 n = strdup(c);
1436 if (!n)
1437 return -ENOMEM;
1438
1439 *cgroup = n;
1440 }
1441
1442 return 0;
1443 }
1444
1445 int cg_path_decode_unit(const char *cgroup, char **unit) {
1446 char *c, *s;
1447 size_t n;
1448
1449 assert(cgroup);
1450 assert(unit);
1451
1452 n = strcspn(cgroup, "/");
1453 if (n < 3)
1454 return -ENXIO;
1455
1456 c = strndupa(cgroup, n);
1457 c = cg_unescape(c);
1458
1459 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1460 return -ENXIO;
1461
1462 s = strdup(c);
1463 if (!s)
1464 return -ENOMEM;
1465
1466 *unit = s;
1467 return 0;
1468 }
1469
1470 static bool valid_slice_name(const char *p, size_t n) {
1471
1472 if (!p)
1473 return false;
1474
1475 if (n < STRLEN("x.slice"))
1476 return false;
1477
1478 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1479 char buf[n+1], *c;
1480
1481 memcpy(buf, p, n);
1482 buf[n] = 0;
1483
1484 c = cg_unescape(buf);
1485
1486 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1487 }
1488
1489 return false;
1490 }
1491
1492 static const char *skip_slices(const char *p) {
1493 assert(p);
1494
1495 /* Skips over all slice assignments */
1496
1497 for (;;) {
1498 size_t n;
1499
1500 p += strspn(p, "/");
1501
1502 n = strcspn(p, "/");
1503 if (!valid_slice_name(p, n))
1504 return p;
1505
1506 p += n;
1507 }
1508 }
1509
1510 int cg_path_get_unit(const char *path, char **ret) {
1511 const char *e;
1512 char *unit;
1513 int r;
1514
1515 assert(path);
1516 assert(ret);
1517
1518 e = skip_slices(path);
1519
1520 r = cg_path_decode_unit(e, &unit);
1521 if (r < 0)
1522 return r;
1523
1524 /* We skipped over the slices, don't accept any now */
1525 if (endswith(unit, ".slice")) {
1526 free(unit);
1527 return -ENXIO;
1528 }
1529
1530 *ret = unit;
1531 return 0;
1532 }
1533
1534 int cg_pid_get_unit(pid_t pid, char **unit) {
1535 _cleanup_free_ char *cgroup = NULL;
1536 int r;
1537
1538 assert(unit);
1539
1540 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1541 if (r < 0)
1542 return r;
1543
1544 return cg_path_get_unit(cgroup, unit);
1545 }
1546
1547 /**
1548 * Skip session-*.scope, but require it to be there.
1549 */
1550 static const char *skip_session(const char *p) {
1551 size_t n;
1552
1553 if (isempty(p))
1554 return NULL;
1555
1556 p += strspn(p, "/");
1557
1558 n = strcspn(p, "/");
1559 if (n < STRLEN("session-x.scope"))
1560 return NULL;
1561
1562 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1563 char buf[n - 8 - 6 + 1];
1564
1565 memcpy(buf, p + 8, n - 8 - 6);
1566 buf[n - 8 - 6] = 0;
1567
1568 /* Note that session scopes never need unescaping,
1569 * since they cannot conflict with the kernel's own
1570 * names, hence we don't need to call cg_unescape()
1571 * here. */
1572
1573 if (!session_id_valid(buf))
1574 return false;
1575
1576 p += n;
1577 p += strspn(p, "/");
1578 return p;
1579 }
1580
1581 return NULL;
1582 }
1583
1584 /**
1585 * Skip user@*.service, but require it to be there.
1586 */
1587 static const char *skip_user_manager(const char *p) {
1588 size_t n;
1589
1590 if (isempty(p))
1591 return NULL;
1592
1593 p += strspn(p, "/");
1594
1595 n = strcspn(p, "/");
1596 if (n < STRLEN("user@x.service"))
1597 return NULL;
1598
1599 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1600 char buf[n - 5 - 8 + 1];
1601
1602 memcpy(buf, p + 5, n - 5 - 8);
1603 buf[n - 5 - 8] = 0;
1604
1605 /* Note that user manager services never need unescaping,
1606 * since they cannot conflict with the kernel's own
1607 * names, hence we don't need to call cg_unescape()
1608 * here. */
1609
1610 if (parse_uid(buf, NULL) < 0)
1611 return NULL;
1612
1613 p += n;
1614 p += strspn(p, "/");
1615
1616 return p;
1617 }
1618
1619 return NULL;
1620 }
1621
1622 static const char *skip_user_prefix(const char *path) {
1623 const char *e, *t;
1624
1625 assert(path);
1626
1627 /* Skip slices, if there are any */
1628 e = skip_slices(path);
1629
1630 /* Skip the user manager, if it's in the path now... */
1631 t = skip_user_manager(e);
1632 if (t)
1633 return t;
1634
1635 /* Alternatively skip the user session if it is in the path... */
1636 return skip_session(e);
1637 }
1638
1639 int cg_path_get_user_unit(const char *path, char **ret) {
1640 const char *t;
1641
1642 assert(path);
1643 assert(ret);
1644
1645 t = skip_user_prefix(path);
1646 if (!t)
1647 return -ENXIO;
1648
1649 /* And from here on it looks pretty much the same as for a
1650 * system unit, hence let's use the same parser from here
1651 * on. */
1652 return cg_path_get_unit(t, ret);
1653 }
1654
1655 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1656 _cleanup_free_ char *cgroup = NULL;
1657 int r;
1658
1659 assert(unit);
1660
1661 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1662 if (r < 0)
1663 return r;
1664
1665 return cg_path_get_user_unit(cgroup, unit);
1666 }
1667
1668 int cg_path_get_machine_name(const char *path, char **machine) {
1669 _cleanup_free_ char *u = NULL;
1670 const char *sl;
1671 int r;
1672
1673 r = cg_path_get_unit(path, &u);
1674 if (r < 0)
1675 return r;
1676
1677 sl = strjoina("/run/systemd/machines/unit:", u);
1678 return readlink_malloc(sl, machine);
1679 }
1680
1681 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1682 _cleanup_free_ char *cgroup = NULL;
1683 int r;
1684
1685 assert(machine);
1686
1687 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1688 if (r < 0)
1689 return r;
1690
1691 return cg_path_get_machine_name(cgroup, machine);
1692 }
1693
1694 int cg_path_get_session(const char *path, char **session) {
1695 _cleanup_free_ char *unit = NULL;
1696 char *start, *end;
1697 int r;
1698
1699 assert(path);
1700
1701 r = cg_path_get_unit(path, &unit);
1702 if (r < 0)
1703 return r;
1704
1705 start = startswith(unit, "session-");
1706 if (!start)
1707 return -ENXIO;
1708 end = endswith(start, ".scope");
1709 if (!end)
1710 return -ENXIO;
1711
1712 *end = 0;
1713 if (!session_id_valid(start))
1714 return -ENXIO;
1715
1716 if (session) {
1717 char *rr;
1718
1719 rr = strdup(start);
1720 if (!rr)
1721 return -ENOMEM;
1722
1723 *session = rr;
1724 }
1725
1726 return 0;
1727 }
1728
1729 int cg_pid_get_session(pid_t pid, char **session) {
1730 _cleanup_free_ char *cgroup = NULL;
1731 int r;
1732
1733 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1734 if (r < 0)
1735 return r;
1736
1737 return cg_path_get_session(cgroup, session);
1738 }
1739
1740 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1741 _cleanup_free_ char *slice = NULL;
1742 char *start, *end;
1743 int r;
1744
1745 assert(path);
1746
1747 r = cg_path_get_slice(path, &slice);
1748 if (r < 0)
1749 return r;
1750
1751 start = startswith(slice, "user-");
1752 if (!start)
1753 return -ENXIO;
1754 end = endswith(start, ".slice");
1755 if (!end)
1756 return -ENXIO;
1757
1758 *end = 0;
1759 if (parse_uid(start, uid) < 0)
1760 return -ENXIO;
1761
1762 return 0;
1763 }
1764
1765 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1766 _cleanup_free_ char *cgroup = NULL;
1767 int r;
1768
1769 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1770 if (r < 0)
1771 return r;
1772
1773 return cg_path_get_owner_uid(cgroup, uid);
1774 }
1775
1776 int cg_path_get_slice(const char *p, char **slice) {
1777 const char *e = NULL;
1778
1779 assert(p);
1780 assert(slice);
1781
1782 /* Finds the right-most slice unit from the beginning, but
1783 * stops before we come to the first non-slice unit. */
1784
1785 for (;;) {
1786 size_t n;
1787
1788 p += strspn(p, "/");
1789
1790 n = strcspn(p, "/");
1791 if (!valid_slice_name(p, n)) {
1792
1793 if (!e) {
1794 char *s;
1795
1796 s = strdup(SPECIAL_ROOT_SLICE);
1797 if (!s)
1798 return -ENOMEM;
1799
1800 *slice = s;
1801 return 0;
1802 }
1803
1804 return cg_path_decode_unit(e, slice);
1805 }
1806
1807 e = p;
1808 p += n;
1809 }
1810 }
1811
1812 int cg_pid_get_slice(pid_t pid, char **slice) {
1813 _cleanup_free_ char *cgroup = NULL;
1814 int r;
1815
1816 assert(slice);
1817
1818 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1819 if (r < 0)
1820 return r;
1821
1822 return cg_path_get_slice(cgroup, slice);
1823 }
1824
1825 int cg_path_get_user_slice(const char *p, char **slice) {
1826 const char *t;
1827 assert(p);
1828 assert(slice);
1829
1830 t = skip_user_prefix(p);
1831 if (!t)
1832 return -ENXIO;
1833
1834 /* And now it looks pretty much the same as for a system
1835 * slice, so let's just use the same parser from here on. */
1836 return cg_path_get_slice(t, slice);
1837 }
1838
1839 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1840 _cleanup_free_ char *cgroup = NULL;
1841 int r;
1842
1843 assert(slice);
1844
1845 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1846 if (r < 0)
1847 return r;
1848
1849 return cg_path_get_user_slice(cgroup, slice);
1850 }
1851
1852 char *cg_escape(const char *p) {
1853 bool need_prefix = false;
1854
1855 /* This implements very minimal escaping for names to be used
1856 * as file names in the cgroup tree: any name which might
1857 * conflict with a kernel name or is prefixed with '_' is
1858 * prefixed with a '_'. That way, when reading cgroup names it
1859 * is sufficient to remove a single prefixing underscore if
1860 * there is one. */
1861
1862 /* The return value of this function (unlike cg_unescape())
1863 * needs free()! */
1864
1865 if (IN_SET(p[0], 0, '_', '.') ||
1866 streq(p, "notify_on_release") ||
1867 streq(p, "release_agent") ||
1868 streq(p, "tasks") ||
1869 startswith(p, "cgroup."))
1870 need_prefix = true;
1871 else {
1872 const char *dot;
1873
1874 dot = strrchr(p, '.');
1875 if (dot) {
1876 CGroupController c;
1877 size_t l = dot - p;
1878
1879 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1880 const char *n;
1881
1882 n = cgroup_controller_to_string(c);
1883
1884 if (l != strlen(n))
1885 continue;
1886
1887 if (memcmp(p, n, l) != 0)
1888 continue;
1889
1890 need_prefix = true;
1891 break;
1892 }
1893 }
1894 }
1895
1896 if (need_prefix)
1897 return strappend("_", p);
1898
1899 return strdup(p);
1900 }
1901
1902 char *cg_unescape(const char *p) {
1903 assert(p);
1904
1905 /* The return value of this function (unlike cg_escape())
1906 * doesn't need free()! */
1907
1908 if (p[0] == '_')
1909 return (char*) p+1;
1910
1911 return (char*) p;
1912 }
1913
1914 #define CONTROLLER_VALID \
1915 DIGITS LETTERS \
1916 "_"
1917
1918 bool cg_controller_is_valid(const char *p) {
1919 const char *t, *s;
1920
1921 if (!p)
1922 return false;
1923
1924 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1925 return true;
1926
1927 s = startswith(p, "name=");
1928 if (s)
1929 p = s;
1930
1931 if (IN_SET(*p, 0, '_'))
1932 return false;
1933
1934 for (t = p; *t; t++)
1935 if (!strchr(CONTROLLER_VALID, *t))
1936 return false;
1937
1938 if (t - p > FILENAME_MAX)
1939 return false;
1940
1941 return true;
1942 }
1943
1944 int cg_slice_to_path(const char *unit, char **ret) {
1945 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1946 const char *dash;
1947 int r;
1948
1949 assert(unit);
1950 assert(ret);
1951
1952 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1953 char *x;
1954
1955 x = strdup("");
1956 if (!x)
1957 return -ENOMEM;
1958 *ret = x;
1959 return 0;
1960 }
1961
1962 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1963 return -EINVAL;
1964
1965 if (!endswith(unit, ".slice"))
1966 return -EINVAL;
1967
1968 r = unit_name_to_prefix(unit, &p);
1969 if (r < 0)
1970 return r;
1971
1972 dash = strchr(p, '-');
1973
1974 /* Don't allow initial dashes */
1975 if (dash == p)
1976 return -EINVAL;
1977
1978 while (dash) {
1979 _cleanup_free_ char *escaped = NULL;
1980 char n[dash - p + sizeof(".slice")];
1981
1982 /* Don't allow trailing or double dashes */
1983 if (IN_SET(dash[1], 0, '-'))
1984 return -EINVAL;
1985
1986 strcpy(stpncpy(n, p, dash - p), ".slice");
1987 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1988 return -EINVAL;
1989
1990 escaped = cg_escape(n);
1991 if (!escaped)
1992 return -ENOMEM;
1993
1994 if (!strextend(&s, escaped, "/", NULL))
1995 return -ENOMEM;
1996
1997 dash = strchr(dash+1, '-');
1998 }
1999
2000 e = cg_escape(unit);
2001 if (!e)
2002 return -ENOMEM;
2003
2004 if (!strextend(&s, e, NULL))
2005 return -ENOMEM;
2006
2007 *ret = s;
2008 s = NULL;
2009
2010 return 0;
2011 }
2012
2013 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2014 _cleanup_free_ char *p = NULL;
2015 int r;
2016
2017 r = cg_get_path(controller, path, attribute, &p);
2018 if (r < 0)
2019 return r;
2020
2021 return write_string_file(p, value, 0);
2022 }
2023
2024 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2025 _cleanup_free_ char *p = NULL;
2026 int r;
2027
2028 r = cg_get_path(controller, path, attribute, &p);
2029 if (r < 0)
2030 return r;
2031
2032 return read_one_line_file(p, ret);
2033 }
2034
2035 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2036 _cleanup_free_ char *filename = NULL, *content = NULL;
2037 char *line, *p;
2038 int i, r;
2039
2040 for (i = 0; keys[i]; i++)
2041 values[i] = NULL;
2042
2043 r = cg_get_path(controller, path, attribute, &filename);
2044 if (r < 0)
2045 return r;
2046
2047 r = read_full_file(filename, &content, NULL);
2048 if (r < 0)
2049 return r;
2050
2051 p = content;
2052 while ((line = strsep(&p, "\n"))) {
2053 char *key;
2054
2055 key = strsep(&line, " ");
2056
2057 for (i = 0; keys[i]; i++) {
2058 if (streq(key, keys[i])) {
2059 values[i] = strdup(line);
2060 break;
2061 }
2062 }
2063 }
2064
2065 for (i = 0; keys[i]; i++) {
2066 if (!values[i]) {
2067 for (i = 0; keys[i]; i++) {
2068 values[i] = mfree(values[i]);
2069 }
2070 return -ENOENT;
2071 }
2072 }
2073
2074 return 0;
2075 }
2076
2077 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2078 CGroupController c;
2079 int r;
2080
2081 /* This one will create a cgroup in our private tree, but also
2082 * duplicate it in the trees specified in mask, and remove it
2083 * in all others */
2084
2085 /* First create the cgroup in our own hierarchy. */
2086 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2087 if (r < 0)
2088 return r;
2089
2090 /* If we are in the unified hierarchy, we are done now */
2091 r = cg_all_unified();
2092 if (r < 0)
2093 return r;
2094 if (r > 0)
2095 return 0;
2096
2097 /* Otherwise, do the same in the other hierarchies */
2098 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2099 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2100 const char *n;
2101
2102 n = cgroup_controller_to_string(c);
2103
2104 if (mask & bit)
2105 (void) cg_create(n, path);
2106 else if (supported & bit)
2107 (void) cg_trim(n, path, true);
2108 }
2109
2110 return 0;
2111 }
2112
2113 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2114 CGroupController c;
2115 int r;
2116
2117 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2118 if (r < 0)
2119 return r;
2120
2121 r = cg_all_unified();
2122 if (r < 0)
2123 return r;
2124 if (r > 0)
2125 return 0;
2126
2127 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2128 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2129 const char *p = NULL;
2130
2131 if (!(supported & bit))
2132 continue;
2133
2134 if (path_callback)
2135 p = path_callback(bit, userdata);
2136
2137 if (!p)
2138 p = path;
2139
2140 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2141 }
2142
2143 return 0;
2144 }
2145
2146 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2147 Iterator i;
2148 void *pidp;
2149 int r = 0;
2150
2151 SET_FOREACH(pidp, pids, i) {
2152 pid_t pid = PTR_TO_PID(pidp);
2153 int q;
2154
2155 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2156 if (q < 0 && r >= 0)
2157 r = q;
2158 }
2159
2160 return r;
2161 }
2162
2163 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2164 CGroupController c;
2165 int r = 0, q;
2166
2167 if (!path_equal(from, to)) {
2168 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2169 if (r < 0)
2170 return r;
2171 }
2172
2173 q = cg_all_unified();
2174 if (q < 0)
2175 return q;
2176 if (q > 0)
2177 return r;
2178
2179 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2180 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2181 const char *p = NULL;
2182
2183 if (!(supported & bit))
2184 continue;
2185
2186 if (to_callback)
2187 p = to_callback(bit, userdata);
2188
2189 if (!p)
2190 p = to;
2191
2192 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2193 }
2194
2195 return 0;
2196 }
2197
2198 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2199 CGroupController c;
2200 int r, q;
2201
2202 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2203 if (r < 0)
2204 return r;
2205
2206 q = cg_all_unified();
2207 if (q < 0)
2208 return q;
2209 if (q > 0)
2210 return r;
2211
2212 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2213 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2214
2215 if (!(supported & bit))
2216 continue;
2217
2218 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2219 }
2220
2221 return 0;
2222 }
2223
2224 int cg_mask_to_string(CGroupMask mask, char **ret) {
2225 _cleanup_free_ char *s = NULL;
2226 size_t n = 0, allocated = 0;
2227 bool space = false;
2228 CGroupController c;
2229
2230 assert(ret);
2231
2232 if (mask == 0) {
2233 *ret = NULL;
2234 return 0;
2235 }
2236
2237 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2238 const char *k;
2239 size_t l;
2240
2241 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2242 continue;
2243
2244 k = cgroup_controller_to_string(c);
2245 l = strlen(k);
2246
2247 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2248 return -ENOMEM;
2249
2250 if (space)
2251 s[n] = ' ';
2252 memcpy(s + n + space, k, l);
2253 n += space + l;
2254
2255 space = true;
2256 }
2257
2258 assert(s);
2259
2260 s[n] = 0;
2261 *ret = s;
2262 s = NULL;
2263
2264 return 0;
2265 }
2266
2267 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2268 assert(mask);
2269 assert(value);
2270
2271 for (;;) {
2272 _cleanup_free_ char *n = NULL;
2273 CGroupController v;
2274 int r;
2275
2276 r = extract_first_word(&value, &n, NULL, 0);
2277 if (r < 0)
2278 return r;
2279 if (r == 0)
2280 break;
2281
2282 v = cgroup_controller_from_string(n);
2283 if (v < 0)
2284 continue;
2285
2286 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2287 }
2288 return 0;
2289 }
2290
2291 int cg_mask_supported(CGroupMask *ret) {
2292 CGroupMask mask = 0;
2293 int r;
2294
2295 /* Determines the mask of supported cgroup controllers. Only
2296 * includes controllers we can make sense of and that are
2297 * actually accessible. */
2298
2299 r = cg_all_unified();
2300 if (r < 0)
2301 return r;
2302 if (r > 0) {
2303 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2304
2305 /* In the unified hierarchy we can read the supported
2306 * and accessible controllers from a the top-level
2307 * cgroup attribute */
2308
2309 r = cg_get_root_path(&root);
2310 if (r < 0)
2311 return r;
2312
2313 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2314 if (r < 0)
2315 return r;
2316
2317 r = read_one_line_file(path, &controllers);
2318 if (r < 0)
2319 return r;
2320
2321 r = cg_mask_from_string(controllers, &mask);
2322 if (r < 0)
2323 return r;
2324
2325 /* Currently, we support the cpu, memory, io and pids
2326 * controller in the unified hierarchy, mask
2327 * everything else off. */
2328 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2329
2330 } else {
2331 CGroupController c;
2332
2333 /* In the legacy hierarchy, we check whether which
2334 * hierarchies are mounted. */
2335
2336 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2337 const char *n;
2338
2339 n = cgroup_controller_to_string(c);
2340 if (controller_is_accessible(n) >= 0)
2341 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2342 }
2343 }
2344
2345 *ret = mask;
2346 return 0;
2347 }
2348
2349 int cg_kernel_controllers(Set **ret) {
2350 _cleanup_set_free_free_ Set *controllers = NULL;
2351 _cleanup_fclose_ FILE *f = NULL;
2352 int r;
2353
2354 assert(ret);
2355
2356 /* Determines the full list of kernel-known controllers. Might
2357 * include controllers we don't actually support, arbitrary
2358 * named hierarchies and controllers that aren't currently
2359 * accessible (because not mounted). */
2360
2361 controllers = set_new(&string_hash_ops);
2362 if (!controllers)
2363 return -ENOMEM;
2364
2365 f = fopen("/proc/cgroups", "re");
2366 if (!f) {
2367 if (errno == ENOENT) {
2368 *ret = NULL;
2369 return 0;
2370 }
2371
2372 return -errno;
2373 }
2374
2375 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2376
2377 /* Ignore the header line */
2378 (void) read_line(f, (size_t) -1, NULL);
2379
2380 for (;;) {
2381 char *controller;
2382 int enabled = 0;
2383
2384 errno = 0;
2385 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2386
2387 if (feof(f))
2388 break;
2389
2390 if (ferror(f) && errno > 0)
2391 return -errno;
2392
2393 return -EBADMSG;
2394 }
2395
2396 if (!enabled) {
2397 free(controller);
2398 continue;
2399 }
2400
2401 if (!cg_controller_is_valid(controller)) {
2402 free(controller);
2403 return -EBADMSG;
2404 }
2405
2406 r = set_consume(controllers, controller);
2407 if (r < 0)
2408 return r;
2409 }
2410
2411 *ret = controllers;
2412 controllers = NULL;
2413
2414 return 0;
2415 }
2416
2417 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2418
2419 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2420 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2421 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2422 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2423 *
2424 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2425 * process management but disable the compat dual layout, we return %true on
2426 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2427 */
2428 static thread_local bool unified_systemd_v232;
2429
2430 static int cg_unified_update(void) {
2431
2432 struct statfs fs;
2433
2434 /* Checks if we support the unified hierarchy. Returns an
2435 * error when the cgroup hierarchies aren't mounted yet or we
2436 * have any other trouble determining if the unified hierarchy
2437 * is supported. */
2438
2439 if (unified_cache >= CGROUP_UNIFIED_NONE)
2440 return 0;
2441
2442 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2443 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2444
2445 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2446 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2447 unified_cache = CGROUP_UNIFIED_ALL;
2448 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2449 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2450 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2451 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2452 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2453 unified_systemd_v232 = false;
2454 } else {
2455 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2456 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2457
2458 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2459 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2460 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2461 unified_systemd_v232 = true;
2462 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2463 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2464 unified_cache = CGROUP_UNIFIED_NONE;
2465 } else {
2466 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2467 (unsigned long long) fs.f_type);
2468 unified_cache = CGROUP_UNIFIED_NONE;
2469 }
2470 }
2471 } else {
2472 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2473 (unsigned long long) fs.f_type);
2474 return -ENOMEDIUM;
2475 }
2476
2477 return 0;
2478 }
2479
2480 int cg_unified_controller(const char *controller) {
2481 int r;
2482
2483 r = cg_unified_update();
2484 if (r < 0)
2485 return r;
2486
2487 if (unified_cache == CGROUP_UNIFIED_NONE)
2488 return false;
2489
2490 if (unified_cache >= CGROUP_UNIFIED_ALL)
2491 return true;
2492
2493 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2494 }
2495
2496 int cg_all_unified(void) {
2497 int r;
2498
2499 r = cg_unified_update();
2500 if (r < 0)
2501 return r;
2502
2503 return unified_cache >= CGROUP_UNIFIED_ALL;
2504 }
2505
2506 int cg_hybrid_unified(void) {
2507 int r;
2508
2509 r = cg_unified_update();
2510 if (r < 0)
2511 return r;
2512
2513 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2514 }
2515
2516 int cg_unified_flush(void) {
2517 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2518
2519 return cg_unified_update();
2520 }
2521
2522 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2523 _cleanup_fclose_ FILE *f = NULL;
2524 _cleanup_free_ char *fs = NULL;
2525 CGroupController c;
2526 int r;
2527
2528 assert(p);
2529
2530 if (supported == 0)
2531 return 0;
2532
2533 r = cg_all_unified();
2534 if (r < 0)
2535 return r;
2536 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2537 return 0;
2538
2539 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2540 if (r < 0)
2541 return r;
2542
2543 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2544 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2545 const char *n;
2546
2547 if (!(supported & bit))
2548 continue;
2549
2550 n = cgroup_controller_to_string(c);
2551 {
2552 char s[1 + strlen(n) + 1];
2553
2554 s[0] = mask & bit ? '+' : '-';
2555 strcpy(s + 1, n);
2556
2557 if (!f) {
2558 f = fopen(fs, "we");
2559 if (!f) {
2560 log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2561 break;
2562 }
2563 }
2564
2565 r = write_string_stream(f, s, 0);
2566 if (r < 0)
2567 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2568 }
2569 }
2570
2571 return 0;
2572 }
2573
2574 bool cg_is_unified_wanted(void) {
2575 static thread_local int wanted = -1;
2576 int r;
2577 bool b;
2578 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2579
2580 /* If we have a cached value, return that. */
2581 if (wanted >= 0)
2582 return wanted;
2583
2584 /* If the hierarchy is already mounted, then follow whatever
2585 * was chosen for it. */
2586 if (cg_unified_flush() >= 0)
2587 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2588
2589 /* Otherwise, let's see what the kernel command line has to say.
2590 * Since checking is expensive, cache a non-error result. */
2591 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2592
2593 return (wanted = r > 0 ? b : is_default);
2594 }
2595
2596 bool cg_is_legacy_wanted(void) {
2597 static thread_local int wanted = -1;
2598
2599 /* If we have a cached value, return that. */
2600 if (wanted >= 0)
2601 return wanted;
2602
2603 /* Check if we have cgroups2 already mounted. */
2604 if (cg_unified_flush() >= 0 &&
2605 unified_cache == CGROUP_UNIFIED_ALL)
2606 return (wanted = false);
2607
2608 /* Otherwise, assume that at least partial legacy is wanted,
2609 * since cgroups2 should already be mounted at this point. */
2610 return (wanted = true);
2611 }
2612
2613 bool cg_is_hybrid_wanted(void) {
2614 static thread_local int wanted = -1;
2615 int r;
2616 bool b;
2617 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2618 /* We default to true if the default is "hybrid", obviously,
2619 * but also when the default is "unified", because if we get
2620 * called, it means that unified hierarchy was not mounted. */
2621
2622 /* If we have a cached value, return that. */
2623 if (wanted >= 0)
2624 return wanted;
2625
2626 /* If the hierarchy is already mounted, then follow whatever
2627 * was chosen for it. */
2628 if (cg_unified_flush() >= 0 &&
2629 unified_cache == CGROUP_UNIFIED_ALL)
2630 return (wanted = false);
2631
2632 /* Otherwise, let's see what the kernel command line has to say.
2633 * Since checking is expensive, cache a non-error result. */
2634 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2635
2636 /* The meaning of the kernel option is reversed wrt. to the return value
2637 * of this function, hence the negation. */
2638 return (wanted = r > 0 ? !b : is_default);
2639 }
2640
2641 int cg_weight_parse(const char *s, uint64_t *ret) {
2642 uint64_t u;
2643 int r;
2644
2645 if (isempty(s)) {
2646 *ret = CGROUP_WEIGHT_INVALID;
2647 return 0;
2648 }
2649
2650 r = safe_atou64(s, &u);
2651 if (r < 0)
2652 return r;
2653
2654 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2655 return -ERANGE;
2656
2657 *ret = u;
2658 return 0;
2659 }
2660
2661 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2662 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2663 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2664 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2665 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2666 };
2667
2668 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2669 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2670 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2671 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2672 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2673 };
2674
2675 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2676
2677 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2678 uint64_t u;
2679 int r;
2680
2681 if (isempty(s)) {
2682 *ret = CGROUP_CPU_SHARES_INVALID;
2683 return 0;
2684 }
2685
2686 r = safe_atou64(s, &u);
2687 if (r < 0)
2688 return r;
2689
2690 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2691 return -ERANGE;
2692
2693 *ret = u;
2694 return 0;
2695 }
2696
2697 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2698 uint64_t u;
2699 int r;
2700
2701 if (isempty(s)) {
2702 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2703 return 0;
2704 }
2705
2706 r = safe_atou64(s, &u);
2707 if (r < 0)
2708 return r;
2709
2710 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2711 return -ERANGE;
2712
2713 *ret = u;
2714 return 0;
2715 }
2716
2717 bool is_cgroup_fs(const struct statfs *s) {
2718 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2719 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2720 }
2721
2722 bool fd_is_cgroup_fs(int fd) {
2723 struct statfs s;
2724
2725 if (fstatfs(fd, &s) < 0)
2726 return -errno;
2727
2728 return is_cgroup_fs(&s);
2729 }
2730
2731 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2732 [CGROUP_CONTROLLER_CPU] = "cpu",
2733 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2734 [CGROUP_CONTROLLER_IO] = "io",
2735 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2736 [CGROUP_CONTROLLER_MEMORY] = "memory",
2737 [CGROUP_CONTROLLER_DEVICES] = "devices",
2738 [CGROUP_CONTROLLER_PIDS] = "pids",
2739 };
2740
2741 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);