]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <dirent.h>
22 #include <errno.h>
23 #include <ftw.h>
24 #include <limits.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/stat.h>
30 #include <sys/statfs.h>
31 #include <sys/types.h>
32 #include <sys/xattr.h>
33 #include <unistd.h>
34
35 #include "alloc-util.h"
36 #include "cgroup-util.h"
37 #include "def.h"
38 #include "dirent-util.h"
39 #include "extract-word.h"
40 #include "fd-util.h"
41 #include "fileio.h"
42 #include "format-util.h"
43 #include "fs-util.h"
44 #include "log.h"
45 #include "login-util.h"
46 #include "macro.h"
47 #include "missing.h"
48 #include "mkdir.h"
49 #include "parse-util.h"
50 #include "path-util.h"
51 #include "proc-cmdline.h"
52 #include "process-util.h"
53 #include "set.h"
54 #include "special.h"
55 #include "stat-util.h"
56 #include "stdio-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "strv.h"
60 #include "unit-name.h"
61 #include "user-util.h"
62
63 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
64 _cleanup_free_ char *fs = NULL;
65 FILE *f;
66 int r;
67
68 assert(_f);
69
70 r = cg_get_path(controller, path, "cgroup.procs", &fs);
71 if (r < 0)
72 return r;
73
74 f = fopen(fs, "re");
75 if (!f)
76 return -errno;
77
78 *_f = f;
79 return 0;
80 }
81
82 int cg_read_pid(FILE *f, pid_t *_pid) {
83 unsigned long ul;
84
85 /* Note that the cgroup.procs might contain duplicates! See
86 * cgroups.txt for details. */
87
88 assert(f);
89 assert(_pid);
90
91 errno = 0;
92 if (fscanf(f, "%lu", &ul) != 1) {
93
94 if (feof(f))
95 return 0;
96
97 return errno > 0 ? -errno : -EIO;
98 }
99
100 if (ul <= 0)
101 return -EIO;
102
103 *_pid = (pid_t) ul;
104 return 1;
105 }
106
107 int cg_read_event(
108 const char *controller,
109 const char *path,
110 const char *event,
111 char **val) {
112
113 _cleanup_free_ char *events = NULL, *content = NULL;
114 char *p, *line;
115 int r;
116
117 r = cg_get_path(controller, path, "cgroup.events", &events);
118 if (r < 0)
119 return r;
120
121 r = read_full_file(events, &content, NULL);
122 if (r < 0)
123 return r;
124
125 p = content;
126 while ((line = strsep(&p, "\n"))) {
127 char *key;
128
129 key = strsep(&line, " ");
130 if (!key || !line)
131 return -EINVAL;
132
133 if (strcmp(key, event))
134 continue;
135
136 *val = strdup(line);
137 return 0;
138 }
139
140 return -ENOENT;
141 }
142
143 bool cg_ns_supported(void) {
144 static thread_local int enabled = -1;
145
146 if (enabled >= 0)
147 return enabled;
148
149 if (access("/proc/self/ns/cgroup", F_OK) == 0)
150 enabled = 1;
151 else
152 enabled = 0;
153
154 return enabled;
155 }
156
157 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
158 _cleanup_free_ char *fs = NULL;
159 int r;
160 DIR *d;
161
162 assert(_d);
163
164 /* This is not recursive! */
165
166 r = cg_get_path(controller, path, NULL, &fs);
167 if (r < 0)
168 return r;
169
170 d = opendir(fs);
171 if (!d)
172 return -errno;
173
174 *_d = d;
175 return 0;
176 }
177
178 int cg_read_subgroup(DIR *d, char **fn) {
179 struct dirent *de;
180
181 assert(d);
182 assert(fn);
183
184 FOREACH_DIRENT_ALL(de, d, return -errno) {
185 char *b;
186
187 if (de->d_type != DT_DIR)
188 continue;
189
190 if (dot_or_dot_dot(de->d_name))
191 continue;
192
193 b = strdup(de->d_name);
194 if (!b)
195 return -ENOMEM;
196
197 *fn = b;
198 return 1;
199 }
200
201 return 0;
202 }
203
204 int cg_rmdir(const char *controller, const char *path) {
205 _cleanup_free_ char *p = NULL;
206 int r;
207
208 r = cg_get_path(controller, path, NULL, &p);
209 if (r < 0)
210 return r;
211
212 r = rmdir(p);
213 if (r < 0 && errno != ENOENT)
214 return -errno;
215
216 r = cg_hybrid_unified();
217 if (r < 0)
218 return r;
219 if (r == 0)
220 return 0;
221
222 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
223 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
224 if (r < 0)
225 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
226 }
227
228 return 0;
229 }
230
231 int cg_kill(
232 const char *controller,
233 const char *path,
234 int sig,
235 CGroupFlags flags,
236 Set *s,
237 cg_kill_log_func_t log_kill,
238 void *userdata) {
239
240 _cleanup_set_free_ Set *allocated_set = NULL;
241 bool done = false;
242 int r, ret = 0;
243 pid_t my_pid;
244
245 assert(sig >= 0);
246
247 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
248 * SIGCONT on SIGKILL. */
249 if (IN_SET(sig, SIGCONT, SIGKILL))
250 flags &= ~CGROUP_SIGCONT;
251
252 /* This goes through the tasks list and kills them all. This
253 * is repeated until no further processes are added to the
254 * tasks list, to properly handle forking processes */
255
256 if (!s) {
257 s = allocated_set = set_new(NULL);
258 if (!s)
259 return -ENOMEM;
260 }
261
262 my_pid = getpid_cached();
263
264 do {
265 _cleanup_fclose_ FILE *f = NULL;
266 pid_t pid = 0;
267 done = true;
268
269 r = cg_enumerate_processes(controller, path, &f);
270 if (r < 0) {
271 if (ret >= 0 && r != -ENOENT)
272 return r;
273
274 return ret;
275 }
276
277 while ((r = cg_read_pid(f, &pid)) > 0) {
278
279 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
280 continue;
281
282 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
283 continue;
284
285 if (log_kill)
286 log_kill(pid, sig, userdata);
287
288 /* If we haven't killed this process yet, kill
289 * it */
290 if (kill(pid, sig) < 0) {
291 if (ret >= 0 && errno != ESRCH)
292 ret = -errno;
293 } else {
294 if (flags & CGROUP_SIGCONT)
295 (void) kill(pid, SIGCONT);
296
297 if (ret == 0)
298 ret = 1;
299 }
300
301 done = false;
302
303 r = set_put(s, PID_TO_PTR(pid));
304 if (r < 0) {
305 if (ret >= 0)
306 return r;
307
308 return ret;
309 }
310 }
311
312 if (r < 0) {
313 if (ret >= 0)
314 return r;
315
316 return ret;
317 }
318
319 /* To avoid racing against processes which fork
320 * quicker than we can kill them we repeat this until
321 * no new pids need to be killed. */
322
323 } while (!done);
324
325 return ret;
326 }
327
328 int cg_kill_recursive(
329 const char *controller,
330 const char *path,
331 int sig,
332 CGroupFlags flags,
333 Set *s,
334 cg_kill_log_func_t log_kill,
335 void *userdata) {
336
337 _cleanup_set_free_ Set *allocated_set = NULL;
338 _cleanup_closedir_ DIR *d = NULL;
339 int r, ret;
340 char *fn;
341
342 assert(path);
343 assert(sig >= 0);
344
345 if (!s) {
346 s = allocated_set = set_new(NULL);
347 if (!s)
348 return -ENOMEM;
349 }
350
351 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
352
353 r = cg_enumerate_subgroups(controller, path, &d);
354 if (r < 0) {
355 if (ret >= 0 && r != -ENOENT)
356 return r;
357
358 return ret;
359 }
360
361 while ((r = cg_read_subgroup(d, &fn)) > 0) {
362 _cleanup_free_ char *p = NULL;
363
364 p = strjoin(path, "/", fn);
365 free(fn);
366 if (!p)
367 return -ENOMEM;
368
369 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
370 if (r != 0 && ret >= 0)
371 ret = r;
372 }
373 if (ret >= 0 && r < 0)
374 ret = r;
375
376 if (flags & CGROUP_REMOVE) {
377 r = cg_rmdir(controller, path);
378 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
379 return r;
380 }
381
382 return ret;
383 }
384
385 int cg_migrate(
386 const char *cfrom,
387 const char *pfrom,
388 const char *cto,
389 const char *pto,
390 CGroupFlags flags) {
391
392 bool done = false;
393 _cleanup_set_free_ Set *s = NULL;
394 int r, ret = 0;
395 pid_t my_pid;
396
397 assert(cfrom);
398 assert(pfrom);
399 assert(cto);
400 assert(pto);
401
402 s = set_new(NULL);
403 if (!s)
404 return -ENOMEM;
405
406 my_pid = getpid_cached();
407
408 do {
409 _cleanup_fclose_ FILE *f = NULL;
410 pid_t pid = 0;
411 done = true;
412
413 r = cg_enumerate_processes(cfrom, pfrom, &f);
414 if (r < 0) {
415 if (ret >= 0 && r != -ENOENT)
416 return r;
417
418 return ret;
419 }
420
421 while ((r = cg_read_pid(f, &pid)) > 0) {
422
423 /* This might do weird stuff if we aren't a
424 * single-threaded program. However, we
425 * luckily know we are not */
426 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
427 continue;
428
429 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
430 continue;
431
432 /* Ignore kernel threads. Since they can only
433 * exist in the root cgroup, we only check for
434 * them there. */
435 if (cfrom &&
436 (isempty(pfrom) || path_equal(pfrom, "/")) &&
437 is_kernel_thread(pid) > 0)
438 continue;
439
440 r = cg_attach(cto, pto, pid);
441 if (r < 0) {
442 if (ret >= 0 && r != -ESRCH)
443 ret = r;
444 } else if (ret == 0)
445 ret = 1;
446
447 done = false;
448
449 r = set_put(s, PID_TO_PTR(pid));
450 if (r < 0) {
451 if (ret >= 0)
452 return r;
453
454 return ret;
455 }
456 }
457
458 if (r < 0) {
459 if (ret >= 0)
460 return r;
461
462 return ret;
463 }
464 } while (!done);
465
466 return ret;
467 }
468
469 int cg_migrate_recursive(
470 const char *cfrom,
471 const char *pfrom,
472 const char *cto,
473 const char *pto,
474 CGroupFlags flags) {
475
476 _cleanup_closedir_ DIR *d = NULL;
477 int r, ret = 0;
478 char *fn;
479
480 assert(cfrom);
481 assert(pfrom);
482 assert(cto);
483 assert(pto);
484
485 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
486
487 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
488 if (r < 0) {
489 if (ret >= 0 && r != -ENOENT)
490 return r;
491
492 return ret;
493 }
494
495 while ((r = cg_read_subgroup(d, &fn)) > 0) {
496 _cleanup_free_ char *p = NULL;
497
498 p = strjoin(pfrom, "/", fn);
499 free(fn);
500 if (!p)
501 return -ENOMEM;
502
503 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
504 if (r != 0 && ret >= 0)
505 ret = r;
506 }
507
508 if (r < 0 && ret >= 0)
509 ret = r;
510
511 if (flags & CGROUP_REMOVE) {
512 r = cg_rmdir(cfrom, pfrom);
513 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
514 return r;
515 }
516
517 return ret;
518 }
519
520 int cg_migrate_recursive_fallback(
521 const char *cfrom,
522 const char *pfrom,
523 const char *cto,
524 const char *pto,
525 CGroupFlags flags) {
526
527 int r;
528
529 assert(cfrom);
530 assert(pfrom);
531 assert(cto);
532 assert(pto);
533
534 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
535 if (r < 0) {
536 char prefix[strlen(pto) + 1];
537
538 /* This didn't work? Then let's try all prefixes of the destination */
539
540 PATH_FOREACH_PREFIX(prefix, pto) {
541 int q;
542
543 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
544 if (q >= 0)
545 return q;
546 }
547 }
548
549 return r;
550 }
551
552 static const char *controller_to_dirname(const char *controller) {
553 const char *e;
554
555 assert(controller);
556
557 /* Converts a controller name to the directory name below
558 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
559 * just cuts off the name= prefixed used for named
560 * hierarchies, if it is specified. */
561
562 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
563 if (cg_hybrid_unified() > 0)
564 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
565 else
566 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
567 }
568
569 e = startswith(controller, "name=");
570 if (e)
571 return e;
572
573 return controller;
574 }
575
576 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
577 const char *dn;
578 char *t = NULL;
579
580 assert(fs);
581 assert(controller);
582
583 dn = controller_to_dirname(controller);
584
585 if (isempty(path) && isempty(suffix))
586 t = strappend("/sys/fs/cgroup/", dn);
587 else if (isempty(path))
588 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
589 else if (isempty(suffix))
590 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
591 else
592 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
593 if (!t)
594 return -ENOMEM;
595
596 *fs = t;
597 return 0;
598 }
599
600 static int join_path_unified(const char *path, const char *suffix, char **fs) {
601 char *t;
602
603 assert(fs);
604
605 if (isempty(path) && isempty(suffix))
606 t = strdup("/sys/fs/cgroup");
607 else if (isempty(path))
608 t = strappend("/sys/fs/cgroup/", suffix);
609 else if (isempty(suffix))
610 t = strappend("/sys/fs/cgroup/", path);
611 else
612 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
613 if (!t)
614 return -ENOMEM;
615
616 *fs = t;
617 return 0;
618 }
619
620 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
621 int r;
622
623 assert(fs);
624
625 if (!controller) {
626 char *t;
627
628 /* If no controller is specified, we return the path
629 * *below* the controllers, without any prefix. */
630
631 if (!path && !suffix)
632 return -EINVAL;
633
634 if (!suffix)
635 t = strdup(path);
636 else if (!path)
637 t = strdup(suffix);
638 else
639 t = strjoin(path, "/", suffix);
640 if (!t)
641 return -ENOMEM;
642
643 *fs = path_kill_slashes(t);
644 return 0;
645 }
646
647 if (!cg_controller_is_valid(controller))
648 return -EINVAL;
649
650 r = cg_all_unified();
651 if (r < 0)
652 return r;
653 if (r > 0)
654 r = join_path_unified(path, suffix, fs);
655 else
656 r = join_path_legacy(controller, path, suffix, fs);
657 if (r < 0)
658 return r;
659
660 path_kill_slashes(*fs);
661 return 0;
662 }
663
664 static int controller_is_accessible(const char *controller) {
665 int r;
666
667 assert(controller);
668
669 /* Checks whether a specific controller is accessible,
670 * i.e. its hierarchy mounted. In the unified hierarchy all
671 * controllers are considered accessible, except for the named
672 * hierarchies */
673
674 if (!cg_controller_is_valid(controller))
675 return -EINVAL;
676
677 r = cg_all_unified();
678 if (r < 0)
679 return r;
680 if (r > 0) {
681 /* We don't support named hierarchies if we are using
682 * the unified hierarchy. */
683
684 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
685 return 0;
686
687 if (startswith(controller, "name="))
688 return -EOPNOTSUPP;
689
690 } else {
691 const char *cc, *dn;
692
693 dn = controller_to_dirname(controller);
694 cc = strjoina("/sys/fs/cgroup/", dn);
695
696 if (laccess(cc, F_OK) < 0)
697 return -errno;
698 }
699
700 return 0;
701 }
702
703 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
704 int r;
705
706 assert(controller);
707 assert(fs);
708
709 /* Check if the specified controller is actually accessible */
710 r = controller_is_accessible(controller);
711 if (r < 0)
712 return r;
713
714 return cg_get_path(controller, path, suffix, fs);
715 }
716
717 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
718 assert(path);
719 assert(sb);
720 assert(ftwbuf);
721
722 if (typeflag != FTW_DP)
723 return 0;
724
725 if (ftwbuf->level < 1)
726 return 0;
727
728 (void) rmdir(path);
729 return 0;
730 }
731
732 int cg_trim(const char *controller, const char *path, bool delete_root) {
733 _cleanup_free_ char *fs = NULL;
734 int r = 0, q;
735
736 assert(path);
737
738 r = cg_get_path(controller, path, NULL, &fs);
739 if (r < 0)
740 return r;
741
742 errno = 0;
743 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
744 if (errno == ENOENT)
745 r = 0;
746 else if (errno > 0)
747 r = -errno;
748 else
749 r = -EIO;
750 }
751
752 if (delete_root) {
753 if (rmdir(fs) < 0 && errno != ENOENT)
754 return -errno;
755 }
756
757 q = cg_hybrid_unified();
758 if (q < 0)
759 return q;
760 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
761 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
762 if (q < 0)
763 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
764 }
765
766 return r;
767 }
768
769 int cg_create(const char *controller, const char *path) {
770 _cleanup_free_ char *fs = NULL;
771 int r;
772
773 r = cg_get_path_and_check(controller, path, NULL, &fs);
774 if (r < 0)
775 return r;
776
777 r = mkdir_parents(fs, 0755);
778 if (r < 0)
779 return r;
780
781 if (mkdir(fs, 0755) < 0) {
782
783 if (errno == EEXIST)
784 return 0;
785
786 return -errno;
787 }
788
789 r = cg_hybrid_unified();
790 if (r < 0)
791 return r;
792
793 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
794 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
795 if (r < 0)
796 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
797 }
798
799 return 1;
800 }
801
802 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
803 int r, q;
804
805 assert(pid >= 0);
806
807 r = cg_create(controller, path);
808 if (r < 0)
809 return r;
810
811 q = cg_attach(controller, path, pid);
812 if (q < 0)
813 return q;
814
815 /* This does not remove the cgroup on failure */
816 return r;
817 }
818
819 int cg_attach(const char *controller, const char *path, pid_t pid) {
820 _cleanup_free_ char *fs = NULL;
821 char c[DECIMAL_STR_MAX(pid_t) + 2];
822 int r;
823
824 assert(path);
825 assert(pid >= 0);
826
827 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
828 if (r < 0)
829 return r;
830
831 if (pid == 0)
832 pid = getpid_cached();
833
834 xsprintf(c, PID_FMT "\n", pid);
835
836 r = write_string_file(fs, c, 0);
837 if (r < 0)
838 return r;
839
840 r = cg_hybrid_unified();
841 if (r < 0)
842 return r;
843
844 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
845 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
846 if (r < 0)
847 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
848 }
849
850 return 0;
851 }
852
853 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
854 int r;
855
856 assert(controller);
857 assert(path);
858 assert(pid >= 0);
859
860 r = cg_attach(controller, path, pid);
861 if (r < 0) {
862 char prefix[strlen(path) + 1];
863
864 /* This didn't work? Then let's try all prefixes of
865 * the destination */
866
867 PATH_FOREACH_PREFIX(prefix, path) {
868 int q;
869
870 q = cg_attach(controller, prefix, pid);
871 if (q >= 0)
872 return q;
873 }
874 }
875
876 return r;
877 }
878
879 int cg_set_group_access(
880 const char *controller,
881 const char *path,
882 mode_t mode,
883 uid_t uid,
884 gid_t gid) {
885
886 _cleanup_free_ char *fs = NULL;
887 int r;
888
889 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
890 return 0;
891
892 if (mode != MODE_INVALID)
893 mode &= 0777;
894
895 r = cg_get_path(controller, path, NULL, &fs);
896 if (r < 0)
897 return r;
898
899 r = chmod_and_chown(fs, mode, uid, gid);
900 if (r < 0)
901 return r;
902
903 r = cg_hybrid_unified();
904 if (r < 0)
905 return r;
906 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
907 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
908 if (r < 0)
909 log_debug_errno(r, "Failed to set group access on compatibility systemd cgroup %s, ignoring: %m", path);
910 }
911
912 return 0;
913 }
914
915 int cg_set_task_access(
916 const char *controller,
917 const char *path,
918 mode_t mode,
919 uid_t uid,
920 gid_t gid) {
921
922 _cleanup_free_ char *fs = NULL;
923 int r;
924
925 assert(path);
926
927 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
928 return 0;
929
930 if (mode != MODE_INVALID)
931 mode &= 0666;
932
933 /* For both the legacy and unified hierarchies, "cgroup.procs" is the main entry point for PIDs */
934 r = cg_get_path(controller, path, "cgroup.procs", &fs);
935 if (r < 0)
936 return r;
937
938 r = chmod_and_chown(fs, mode, uid, gid);
939 if (r < 0)
940 return r;
941
942 r = cg_unified_controller(controller);
943 if (r < 0)
944 return r;
945 if (r == 0) {
946 const char *fn;
947
948 /* Compatibility: on cgroupsv1 always keep values for the legacy files "tasks" and
949 * "cgroup.clone_children" in sync with "cgroup.procs". Since this is legacy stuff, we don't care if
950 * this fails. */
951
952 FOREACH_STRING(fn,
953 "tasks",
954 "cgroup.clone_children") {
955
956 fs = mfree(fs);
957
958 r = cg_get_path(controller, path, fn, &fs);
959 if (r < 0)
960 log_debug_errno(r, "Failed to get path for %s of %s, ignoring: %m", fn, path);
961
962 r = chmod_and_chown(fs, mode, uid, gid);
963 if (r < 0)
964 log_debug_errno(r, "Failed to to change ownership/access mode for %s of %s, ignoring: %m", fn, path);
965 }
966 } else {
967 /* On the unified controller, we want to permit subtree controllers too. */
968
969 fs = mfree(fs);
970 r = cg_get_path(controller, path, "cgroup.subtree_control", &fs);
971 if (r < 0)
972 return r;
973
974 r = chmod_and_chown(fs, mode, uid, gid);
975 if (r < 0)
976 return r;
977 }
978
979 r = cg_hybrid_unified();
980 if (r < 0)
981 return r;
982 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
983 /* Always propagate access mode from unified to legacy controller */
984
985 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
986 if (r < 0)
987 log_debug_errno(r, "Failed to set task access on compatibility systemd cgroup %s, ignoring: %m", path);
988 }
989
990 return 0;
991 }
992
993 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
994 _cleanup_free_ char *fs = NULL;
995 int r;
996
997 assert(path);
998 assert(name);
999 assert(value || size <= 0);
1000
1001 r = cg_get_path(controller, path, NULL, &fs);
1002 if (r < 0)
1003 return r;
1004
1005 if (setxattr(fs, name, value, size, flags) < 0)
1006 return -errno;
1007
1008 return 0;
1009 }
1010
1011 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
1012 _cleanup_free_ char *fs = NULL;
1013 ssize_t n;
1014 int r;
1015
1016 assert(path);
1017 assert(name);
1018
1019 r = cg_get_path(controller, path, NULL, &fs);
1020 if (r < 0)
1021 return r;
1022
1023 n = getxattr(fs, name, value, size);
1024 if (n < 0)
1025 return -errno;
1026
1027 return (int) n;
1028 }
1029
1030 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1031 _cleanup_fclose_ FILE *f = NULL;
1032 char line[LINE_MAX];
1033 const char *fs, *controller_str;
1034 size_t cs = 0;
1035 int unified;
1036
1037 assert(path);
1038 assert(pid >= 0);
1039
1040 if (controller) {
1041 if (!cg_controller_is_valid(controller))
1042 return -EINVAL;
1043 } else
1044 controller = SYSTEMD_CGROUP_CONTROLLER;
1045
1046 unified = cg_unified_controller(controller);
1047 if (unified < 0)
1048 return unified;
1049 if (unified == 0) {
1050 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1051 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1052 else
1053 controller_str = controller;
1054
1055 cs = strlen(controller_str);
1056 }
1057
1058 fs = procfs_file_alloca(pid, "cgroup");
1059 f = fopen(fs, "re");
1060 if (!f)
1061 return errno == ENOENT ? -ESRCH : -errno;
1062
1063 FOREACH_LINE(line, f, return -errno) {
1064 char *e, *p;
1065
1066 truncate_nl(line);
1067
1068 if (unified) {
1069 e = startswith(line, "0:");
1070 if (!e)
1071 continue;
1072
1073 e = strchr(e, ':');
1074 if (!e)
1075 continue;
1076 } else {
1077 char *l;
1078 size_t k;
1079 const char *word, *state;
1080 bool found = false;
1081
1082 l = strchr(line, ':');
1083 if (!l)
1084 continue;
1085
1086 l++;
1087 e = strchr(l, ':');
1088 if (!e)
1089 continue;
1090
1091 *e = 0;
1092 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1093 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1094 found = true;
1095 break;
1096 }
1097 }
1098
1099 if (!found)
1100 continue;
1101 }
1102
1103 p = strdup(e + 1);
1104 if (!p)
1105 return -ENOMEM;
1106
1107 *path = p;
1108 return 0;
1109 }
1110
1111 return -ENODATA;
1112 }
1113
1114 int cg_install_release_agent(const char *controller, const char *agent) {
1115 _cleanup_free_ char *fs = NULL, *contents = NULL;
1116 const char *sc;
1117 int r;
1118
1119 assert(agent);
1120
1121 r = cg_unified_controller(controller);
1122 if (r < 0)
1123 return r;
1124 if (r > 0) /* doesn't apply to unified hierarchy */
1125 return -EOPNOTSUPP;
1126
1127 r = cg_get_path(controller, NULL, "release_agent", &fs);
1128 if (r < 0)
1129 return r;
1130
1131 r = read_one_line_file(fs, &contents);
1132 if (r < 0)
1133 return r;
1134
1135 sc = strstrip(contents);
1136 if (isempty(sc)) {
1137 r = write_string_file(fs, agent, 0);
1138 if (r < 0)
1139 return r;
1140 } else if (!path_equal(sc, agent))
1141 return -EEXIST;
1142
1143 fs = mfree(fs);
1144 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1145 if (r < 0)
1146 return r;
1147
1148 contents = mfree(contents);
1149 r = read_one_line_file(fs, &contents);
1150 if (r < 0)
1151 return r;
1152
1153 sc = strstrip(contents);
1154 if (streq(sc, "0")) {
1155 r = write_string_file(fs, "1", 0);
1156 if (r < 0)
1157 return r;
1158
1159 return 1;
1160 }
1161
1162 if (!streq(sc, "1"))
1163 return -EIO;
1164
1165 return 0;
1166 }
1167
1168 int cg_uninstall_release_agent(const char *controller) {
1169 _cleanup_free_ char *fs = NULL;
1170 int r;
1171
1172 r = cg_unified_controller(controller);
1173 if (r < 0)
1174 return r;
1175 if (r > 0) /* Doesn't apply to unified hierarchy */
1176 return -EOPNOTSUPP;
1177
1178 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1179 if (r < 0)
1180 return r;
1181
1182 r = write_string_file(fs, "0", 0);
1183 if (r < 0)
1184 return r;
1185
1186 fs = mfree(fs);
1187
1188 r = cg_get_path(controller, NULL, "release_agent", &fs);
1189 if (r < 0)
1190 return r;
1191
1192 r = write_string_file(fs, "", 0);
1193 if (r < 0)
1194 return r;
1195
1196 return 0;
1197 }
1198
1199 int cg_is_empty(const char *controller, const char *path) {
1200 _cleanup_fclose_ FILE *f = NULL;
1201 pid_t pid;
1202 int r;
1203
1204 assert(path);
1205
1206 r = cg_enumerate_processes(controller, path, &f);
1207 if (r == -ENOENT)
1208 return 1;
1209 if (r < 0)
1210 return r;
1211
1212 r = cg_read_pid(f, &pid);
1213 if (r < 0)
1214 return r;
1215
1216 return r == 0;
1217 }
1218
1219 int cg_is_empty_recursive(const char *controller, const char *path) {
1220 int r;
1221
1222 assert(path);
1223
1224 /* The root cgroup is always populated */
1225 if (controller && (isempty(path) || path_equal(path, "/")))
1226 return false;
1227
1228 r = cg_unified_controller(controller);
1229 if (r < 0)
1230 return r;
1231 if (r > 0) {
1232 _cleanup_free_ char *t = NULL;
1233
1234 /* On the unified hierarchy we can check empty state
1235 * via the "populated" attribute of "cgroup.events". */
1236
1237 r = cg_read_event(controller, path, "populated", &t);
1238 if (r < 0)
1239 return r;
1240
1241 return streq(t, "0");
1242 } else {
1243 _cleanup_closedir_ DIR *d = NULL;
1244 char *fn;
1245
1246 r = cg_is_empty(controller, path);
1247 if (r <= 0)
1248 return r;
1249
1250 r = cg_enumerate_subgroups(controller, path, &d);
1251 if (r == -ENOENT)
1252 return 1;
1253 if (r < 0)
1254 return r;
1255
1256 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1257 _cleanup_free_ char *p = NULL;
1258
1259 p = strjoin(path, "/", fn);
1260 free(fn);
1261 if (!p)
1262 return -ENOMEM;
1263
1264 r = cg_is_empty_recursive(controller, p);
1265 if (r <= 0)
1266 return r;
1267 }
1268 if (r < 0)
1269 return r;
1270
1271 return true;
1272 }
1273 }
1274
1275 int cg_split_spec(const char *spec, char **controller, char **path) {
1276 char *t = NULL, *u = NULL;
1277 const char *e;
1278
1279 assert(spec);
1280
1281 if (*spec == '/') {
1282 if (!path_is_safe(spec))
1283 return -EINVAL;
1284
1285 if (path) {
1286 t = strdup(spec);
1287 if (!t)
1288 return -ENOMEM;
1289
1290 *path = path_kill_slashes(t);
1291 }
1292
1293 if (controller)
1294 *controller = NULL;
1295
1296 return 0;
1297 }
1298
1299 e = strchr(spec, ':');
1300 if (!e) {
1301 if (!cg_controller_is_valid(spec))
1302 return -EINVAL;
1303
1304 if (controller) {
1305 t = strdup(spec);
1306 if (!t)
1307 return -ENOMEM;
1308
1309 *controller = t;
1310 }
1311
1312 if (path)
1313 *path = NULL;
1314
1315 return 0;
1316 }
1317
1318 t = strndup(spec, e-spec);
1319 if (!t)
1320 return -ENOMEM;
1321 if (!cg_controller_is_valid(t)) {
1322 free(t);
1323 return -EINVAL;
1324 }
1325
1326 if (isempty(e+1))
1327 u = NULL;
1328 else {
1329 u = strdup(e+1);
1330 if (!u) {
1331 free(t);
1332 return -ENOMEM;
1333 }
1334
1335 if (!path_is_safe(u) ||
1336 !path_is_absolute(u)) {
1337 free(t);
1338 free(u);
1339 return -EINVAL;
1340 }
1341
1342 path_kill_slashes(u);
1343 }
1344
1345 if (controller)
1346 *controller = t;
1347 else
1348 free(t);
1349
1350 if (path)
1351 *path = u;
1352 else
1353 free(u);
1354
1355 return 0;
1356 }
1357
1358 int cg_mangle_path(const char *path, char **result) {
1359 _cleanup_free_ char *c = NULL, *p = NULL;
1360 char *t;
1361 int r;
1362
1363 assert(path);
1364 assert(result);
1365
1366 /* First, check if it already is a filesystem path */
1367 if (path_startswith(path, "/sys/fs/cgroup")) {
1368
1369 t = strdup(path);
1370 if (!t)
1371 return -ENOMEM;
1372
1373 *result = path_kill_slashes(t);
1374 return 0;
1375 }
1376
1377 /* Otherwise, treat it as cg spec */
1378 r = cg_split_spec(path, &c, &p);
1379 if (r < 0)
1380 return r;
1381
1382 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1383 }
1384
1385 int cg_get_root_path(char **path) {
1386 char *p, *e;
1387 int r;
1388
1389 assert(path);
1390
1391 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1392 if (r < 0)
1393 return r;
1394
1395 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1396 if (!e)
1397 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1398 if (!e)
1399 e = endswith(p, "/system"); /* even more legacy */
1400 if (e)
1401 *e = 0;
1402
1403 *path = p;
1404 return 0;
1405 }
1406
1407 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1408 _cleanup_free_ char *rt = NULL;
1409 char *p;
1410 int r;
1411
1412 assert(cgroup);
1413 assert(shifted);
1414
1415 if (!root) {
1416 /* If the root was specified let's use that, otherwise
1417 * let's determine it from PID 1 */
1418
1419 r = cg_get_root_path(&rt);
1420 if (r < 0)
1421 return r;
1422
1423 root = rt;
1424 }
1425
1426 p = path_startswith(cgroup, root);
1427 if (p && p > cgroup)
1428 *shifted = p - 1;
1429 else
1430 *shifted = cgroup;
1431
1432 return 0;
1433 }
1434
1435 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1436 _cleanup_free_ char *raw = NULL;
1437 const char *c;
1438 int r;
1439
1440 assert(pid >= 0);
1441 assert(cgroup);
1442
1443 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1444 if (r < 0)
1445 return r;
1446
1447 r = cg_shift_path(raw, root, &c);
1448 if (r < 0)
1449 return r;
1450
1451 if (c == raw) {
1452 *cgroup = raw;
1453 raw = NULL;
1454 } else {
1455 char *n;
1456
1457 n = strdup(c);
1458 if (!n)
1459 return -ENOMEM;
1460
1461 *cgroup = n;
1462 }
1463
1464 return 0;
1465 }
1466
1467 int cg_path_decode_unit(const char *cgroup, char **unit) {
1468 char *c, *s;
1469 size_t n;
1470
1471 assert(cgroup);
1472 assert(unit);
1473
1474 n = strcspn(cgroup, "/");
1475 if (n < 3)
1476 return -ENXIO;
1477
1478 c = strndupa(cgroup, n);
1479 c = cg_unescape(c);
1480
1481 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1482 return -ENXIO;
1483
1484 s = strdup(c);
1485 if (!s)
1486 return -ENOMEM;
1487
1488 *unit = s;
1489 return 0;
1490 }
1491
1492 static bool valid_slice_name(const char *p, size_t n) {
1493
1494 if (!p)
1495 return false;
1496
1497 if (n < strlen("x.slice"))
1498 return false;
1499
1500 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1501 char buf[n+1], *c;
1502
1503 memcpy(buf, p, n);
1504 buf[n] = 0;
1505
1506 c = cg_unescape(buf);
1507
1508 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1509 }
1510
1511 return false;
1512 }
1513
1514 static const char *skip_slices(const char *p) {
1515 assert(p);
1516
1517 /* Skips over all slice assignments */
1518
1519 for (;;) {
1520 size_t n;
1521
1522 p += strspn(p, "/");
1523
1524 n = strcspn(p, "/");
1525 if (!valid_slice_name(p, n))
1526 return p;
1527
1528 p += n;
1529 }
1530 }
1531
1532 int cg_path_get_unit(const char *path, char **ret) {
1533 const char *e;
1534 char *unit;
1535 int r;
1536
1537 assert(path);
1538 assert(ret);
1539
1540 e = skip_slices(path);
1541
1542 r = cg_path_decode_unit(e, &unit);
1543 if (r < 0)
1544 return r;
1545
1546 /* We skipped over the slices, don't accept any now */
1547 if (endswith(unit, ".slice")) {
1548 free(unit);
1549 return -ENXIO;
1550 }
1551
1552 *ret = unit;
1553 return 0;
1554 }
1555
1556 int cg_pid_get_unit(pid_t pid, char **unit) {
1557 _cleanup_free_ char *cgroup = NULL;
1558 int r;
1559
1560 assert(unit);
1561
1562 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1563 if (r < 0)
1564 return r;
1565
1566 return cg_path_get_unit(cgroup, unit);
1567 }
1568
1569 /**
1570 * Skip session-*.scope, but require it to be there.
1571 */
1572 static const char *skip_session(const char *p) {
1573 size_t n;
1574
1575 if (isempty(p))
1576 return NULL;
1577
1578 p += strspn(p, "/");
1579
1580 n = strcspn(p, "/");
1581 if (n < strlen("session-x.scope"))
1582 return NULL;
1583
1584 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1585 char buf[n - 8 - 6 + 1];
1586
1587 memcpy(buf, p + 8, n - 8 - 6);
1588 buf[n - 8 - 6] = 0;
1589
1590 /* Note that session scopes never need unescaping,
1591 * since they cannot conflict with the kernel's own
1592 * names, hence we don't need to call cg_unescape()
1593 * here. */
1594
1595 if (!session_id_valid(buf))
1596 return false;
1597
1598 p += n;
1599 p += strspn(p, "/");
1600 return p;
1601 }
1602
1603 return NULL;
1604 }
1605
1606 /**
1607 * Skip user@*.service, but require it to be there.
1608 */
1609 static const char *skip_user_manager(const char *p) {
1610 size_t n;
1611
1612 if (isempty(p))
1613 return NULL;
1614
1615 p += strspn(p, "/");
1616
1617 n = strcspn(p, "/");
1618 if (n < strlen("user@x.service"))
1619 return NULL;
1620
1621 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1622 char buf[n - 5 - 8 + 1];
1623
1624 memcpy(buf, p + 5, n - 5 - 8);
1625 buf[n - 5 - 8] = 0;
1626
1627 /* Note that user manager services never need unescaping,
1628 * since they cannot conflict with the kernel's own
1629 * names, hence we don't need to call cg_unescape()
1630 * here. */
1631
1632 if (parse_uid(buf, NULL) < 0)
1633 return NULL;
1634
1635 p += n;
1636 p += strspn(p, "/");
1637
1638 return p;
1639 }
1640
1641 return NULL;
1642 }
1643
1644 static const char *skip_user_prefix(const char *path) {
1645 const char *e, *t;
1646
1647 assert(path);
1648
1649 /* Skip slices, if there are any */
1650 e = skip_slices(path);
1651
1652 /* Skip the user manager, if it's in the path now... */
1653 t = skip_user_manager(e);
1654 if (t)
1655 return t;
1656
1657 /* Alternatively skip the user session if it is in the path... */
1658 return skip_session(e);
1659 }
1660
1661 int cg_path_get_user_unit(const char *path, char **ret) {
1662 const char *t;
1663
1664 assert(path);
1665 assert(ret);
1666
1667 t = skip_user_prefix(path);
1668 if (!t)
1669 return -ENXIO;
1670
1671 /* And from here on it looks pretty much the same as for a
1672 * system unit, hence let's use the same parser from here
1673 * on. */
1674 return cg_path_get_unit(t, ret);
1675 }
1676
1677 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1678 _cleanup_free_ char *cgroup = NULL;
1679 int r;
1680
1681 assert(unit);
1682
1683 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1684 if (r < 0)
1685 return r;
1686
1687 return cg_path_get_user_unit(cgroup, unit);
1688 }
1689
1690 int cg_path_get_machine_name(const char *path, char **machine) {
1691 _cleanup_free_ char *u = NULL;
1692 const char *sl;
1693 int r;
1694
1695 r = cg_path_get_unit(path, &u);
1696 if (r < 0)
1697 return r;
1698
1699 sl = strjoina("/run/systemd/machines/unit:", u);
1700 return readlink_malloc(sl, machine);
1701 }
1702
1703 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1704 _cleanup_free_ char *cgroup = NULL;
1705 int r;
1706
1707 assert(machine);
1708
1709 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1710 if (r < 0)
1711 return r;
1712
1713 return cg_path_get_machine_name(cgroup, machine);
1714 }
1715
1716 int cg_path_get_session(const char *path, char **session) {
1717 _cleanup_free_ char *unit = NULL;
1718 char *start, *end;
1719 int r;
1720
1721 assert(path);
1722
1723 r = cg_path_get_unit(path, &unit);
1724 if (r < 0)
1725 return r;
1726
1727 start = startswith(unit, "session-");
1728 if (!start)
1729 return -ENXIO;
1730 end = endswith(start, ".scope");
1731 if (!end)
1732 return -ENXIO;
1733
1734 *end = 0;
1735 if (!session_id_valid(start))
1736 return -ENXIO;
1737
1738 if (session) {
1739 char *rr;
1740
1741 rr = strdup(start);
1742 if (!rr)
1743 return -ENOMEM;
1744
1745 *session = rr;
1746 }
1747
1748 return 0;
1749 }
1750
1751 int cg_pid_get_session(pid_t pid, char **session) {
1752 _cleanup_free_ char *cgroup = NULL;
1753 int r;
1754
1755 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1756 if (r < 0)
1757 return r;
1758
1759 return cg_path_get_session(cgroup, session);
1760 }
1761
1762 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1763 _cleanup_free_ char *slice = NULL;
1764 char *start, *end;
1765 int r;
1766
1767 assert(path);
1768
1769 r = cg_path_get_slice(path, &slice);
1770 if (r < 0)
1771 return r;
1772
1773 start = startswith(slice, "user-");
1774 if (!start)
1775 return -ENXIO;
1776 end = endswith(start, ".slice");
1777 if (!end)
1778 return -ENXIO;
1779
1780 *end = 0;
1781 if (parse_uid(start, uid) < 0)
1782 return -ENXIO;
1783
1784 return 0;
1785 }
1786
1787 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1788 _cleanup_free_ char *cgroup = NULL;
1789 int r;
1790
1791 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1792 if (r < 0)
1793 return r;
1794
1795 return cg_path_get_owner_uid(cgroup, uid);
1796 }
1797
1798 int cg_path_get_slice(const char *p, char **slice) {
1799 const char *e = NULL;
1800
1801 assert(p);
1802 assert(slice);
1803
1804 /* Finds the right-most slice unit from the beginning, but
1805 * stops before we come to the first non-slice unit. */
1806
1807 for (;;) {
1808 size_t n;
1809
1810 p += strspn(p, "/");
1811
1812 n = strcspn(p, "/");
1813 if (!valid_slice_name(p, n)) {
1814
1815 if (!e) {
1816 char *s;
1817
1818 s = strdup(SPECIAL_ROOT_SLICE);
1819 if (!s)
1820 return -ENOMEM;
1821
1822 *slice = s;
1823 return 0;
1824 }
1825
1826 return cg_path_decode_unit(e, slice);
1827 }
1828
1829 e = p;
1830 p += n;
1831 }
1832 }
1833
1834 int cg_pid_get_slice(pid_t pid, char **slice) {
1835 _cleanup_free_ char *cgroup = NULL;
1836 int r;
1837
1838 assert(slice);
1839
1840 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1841 if (r < 0)
1842 return r;
1843
1844 return cg_path_get_slice(cgroup, slice);
1845 }
1846
1847 int cg_path_get_user_slice(const char *p, char **slice) {
1848 const char *t;
1849 assert(p);
1850 assert(slice);
1851
1852 t = skip_user_prefix(p);
1853 if (!t)
1854 return -ENXIO;
1855
1856 /* And now it looks pretty much the same as for a system
1857 * slice, so let's just use the same parser from here on. */
1858 return cg_path_get_slice(t, slice);
1859 }
1860
1861 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1862 _cleanup_free_ char *cgroup = NULL;
1863 int r;
1864
1865 assert(slice);
1866
1867 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1868 if (r < 0)
1869 return r;
1870
1871 return cg_path_get_user_slice(cgroup, slice);
1872 }
1873
1874 char *cg_escape(const char *p) {
1875 bool need_prefix = false;
1876
1877 /* This implements very minimal escaping for names to be used
1878 * as file names in the cgroup tree: any name which might
1879 * conflict with a kernel name or is prefixed with '_' is
1880 * prefixed with a '_'. That way, when reading cgroup names it
1881 * is sufficient to remove a single prefixing underscore if
1882 * there is one. */
1883
1884 /* The return value of this function (unlike cg_unescape())
1885 * needs free()! */
1886
1887 if (IN_SET(p[0], 0, '_', '.') ||
1888 streq(p, "notify_on_release") ||
1889 streq(p, "release_agent") ||
1890 streq(p, "tasks") ||
1891 startswith(p, "cgroup."))
1892 need_prefix = true;
1893 else {
1894 const char *dot;
1895
1896 dot = strrchr(p, '.');
1897 if (dot) {
1898 CGroupController c;
1899 size_t l = dot - p;
1900
1901 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1902 const char *n;
1903
1904 n = cgroup_controller_to_string(c);
1905
1906 if (l != strlen(n))
1907 continue;
1908
1909 if (memcmp(p, n, l) != 0)
1910 continue;
1911
1912 need_prefix = true;
1913 break;
1914 }
1915 }
1916 }
1917
1918 if (need_prefix)
1919 return strappend("_", p);
1920
1921 return strdup(p);
1922 }
1923
1924 char *cg_unescape(const char *p) {
1925 assert(p);
1926
1927 /* The return value of this function (unlike cg_escape())
1928 * doesn't need free()! */
1929
1930 if (p[0] == '_')
1931 return (char*) p+1;
1932
1933 return (char*) p;
1934 }
1935
1936 #define CONTROLLER_VALID \
1937 DIGITS LETTERS \
1938 "_"
1939
1940 bool cg_controller_is_valid(const char *p) {
1941 const char *t, *s;
1942
1943 if (!p)
1944 return false;
1945
1946 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1947 return true;
1948
1949 s = startswith(p, "name=");
1950 if (s)
1951 p = s;
1952
1953 if (IN_SET(*p, 0, '_'))
1954 return false;
1955
1956 for (t = p; *t; t++)
1957 if (!strchr(CONTROLLER_VALID, *t))
1958 return false;
1959
1960 if (t - p > FILENAME_MAX)
1961 return false;
1962
1963 return true;
1964 }
1965
1966 int cg_slice_to_path(const char *unit, char **ret) {
1967 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1968 const char *dash;
1969 int r;
1970
1971 assert(unit);
1972 assert(ret);
1973
1974 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1975 char *x;
1976
1977 x = strdup("");
1978 if (!x)
1979 return -ENOMEM;
1980 *ret = x;
1981 return 0;
1982 }
1983
1984 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1985 return -EINVAL;
1986
1987 if (!endswith(unit, ".slice"))
1988 return -EINVAL;
1989
1990 r = unit_name_to_prefix(unit, &p);
1991 if (r < 0)
1992 return r;
1993
1994 dash = strchr(p, '-');
1995
1996 /* Don't allow initial dashes */
1997 if (dash == p)
1998 return -EINVAL;
1999
2000 while (dash) {
2001 _cleanup_free_ char *escaped = NULL;
2002 char n[dash - p + sizeof(".slice")];
2003
2004 /* Don't allow trailing or double dashes */
2005 if (IN_SET(dash[1], 0, '-'))
2006 return -EINVAL;
2007
2008 strcpy(stpncpy(n, p, dash - p), ".slice");
2009 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
2010 return -EINVAL;
2011
2012 escaped = cg_escape(n);
2013 if (!escaped)
2014 return -ENOMEM;
2015
2016 if (!strextend(&s, escaped, "/", NULL))
2017 return -ENOMEM;
2018
2019 dash = strchr(dash+1, '-');
2020 }
2021
2022 e = cg_escape(unit);
2023 if (!e)
2024 return -ENOMEM;
2025
2026 if (!strextend(&s, e, NULL))
2027 return -ENOMEM;
2028
2029 *ret = s;
2030 s = NULL;
2031
2032 return 0;
2033 }
2034
2035 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2036 _cleanup_free_ char *p = NULL;
2037 int r;
2038
2039 r = cg_get_path(controller, path, attribute, &p);
2040 if (r < 0)
2041 return r;
2042
2043 return write_string_file(p, value, 0);
2044 }
2045
2046 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2047 _cleanup_free_ char *p = NULL;
2048 int r;
2049
2050 r = cg_get_path(controller, path, attribute, &p);
2051 if (r < 0)
2052 return r;
2053
2054 return read_one_line_file(p, ret);
2055 }
2056
2057 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2058 _cleanup_free_ char *filename = NULL, *content = NULL;
2059 char *line, *p;
2060 int i, r;
2061
2062 for (i = 0; keys[i]; i++)
2063 values[i] = NULL;
2064
2065 r = cg_get_path(controller, path, attribute, &filename);
2066 if (r < 0)
2067 return r;
2068
2069 r = read_full_file(filename, &content, NULL);
2070 if (r < 0)
2071 return r;
2072
2073 p = content;
2074 while ((line = strsep(&p, "\n"))) {
2075 char *key;
2076
2077 key = strsep(&line, " ");
2078
2079 for (i = 0; keys[i]; i++) {
2080 if (streq(key, keys[i])) {
2081 values[i] = strdup(line);
2082 break;
2083 }
2084 }
2085 }
2086
2087 for (i = 0; keys[i]; i++) {
2088 if (!values[i]) {
2089 for (i = 0; keys[i]; i++) {
2090 free(values[i]);
2091 values[i] = NULL;
2092 }
2093 return -ENOENT;
2094 }
2095 }
2096
2097 return 0;
2098 }
2099
2100 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2101 CGroupController c;
2102 int r;
2103
2104 /* This one will create a cgroup in our private tree, but also
2105 * duplicate it in the trees specified in mask, and remove it
2106 * in all others */
2107
2108 /* First create the cgroup in our own hierarchy. */
2109 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2110 if (r < 0)
2111 return r;
2112
2113 /* If we are in the unified hierarchy, we are done now */
2114 r = cg_all_unified();
2115 if (r < 0)
2116 return r;
2117 if (r > 0)
2118 return 0;
2119
2120 /* Otherwise, do the same in the other hierarchies */
2121 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2122 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2123 const char *n;
2124
2125 n = cgroup_controller_to_string(c);
2126
2127 if (mask & bit)
2128 (void) cg_create(n, path);
2129 else if (supported & bit)
2130 (void) cg_trim(n, path, true);
2131 }
2132
2133 return 0;
2134 }
2135
2136 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2137 CGroupController c;
2138 int r;
2139
2140 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2141 if (r < 0)
2142 return r;
2143
2144 r = cg_all_unified();
2145 if (r < 0)
2146 return r;
2147 if (r > 0)
2148 return 0;
2149
2150 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2151 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2152 const char *p = NULL;
2153
2154 if (!(supported & bit))
2155 continue;
2156
2157 if (path_callback)
2158 p = path_callback(bit, userdata);
2159
2160 if (!p)
2161 p = path;
2162
2163 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2164 }
2165
2166 return 0;
2167 }
2168
2169 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2170 Iterator i;
2171 void *pidp;
2172 int r = 0;
2173
2174 SET_FOREACH(pidp, pids, i) {
2175 pid_t pid = PTR_TO_PID(pidp);
2176 int q;
2177
2178 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2179 if (q < 0 && r >= 0)
2180 r = q;
2181 }
2182
2183 return r;
2184 }
2185
2186 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2187 CGroupController c;
2188 int r = 0, q;
2189
2190 if (!path_equal(from, to)) {
2191 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2192 if (r < 0)
2193 return r;
2194 }
2195
2196 q = cg_all_unified();
2197 if (q < 0)
2198 return q;
2199 if (q > 0)
2200 return r;
2201
2202 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2203 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2204 const char *p = NULL;
2205
2206 if (!(supported & bit))
2207 continue;
2208
2209 if (to_callback)
2210 p = to_callback(bit, userdata);
2211
2212 if (!p)
2213 p = to;
2214
2215 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2216 }
2217
2218 return 0;
2219 }
2220
2221 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2222 CGroupController c;
2223 int r, q;
2224
2225 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2226 if (r < 0)
2227 return r;
2228
2229 q = cg_all_unified();
2230 if (q < 0)
2231 return q;
2232 if (q > 0)
2233 return r;
2234
2235 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2236 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2237
2238 if (!(supported & bit))
2239 continue;
2240
2241 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2242 }
2243
2244 return 0;
2245 }
2246
2247 int cg_mask_to_string(CGroupMask mask, char **ret) {
2248 _cleanup_free_ char *s = NULL;
2249 size_t n = 0, allocated = 0;
2250 bool space = false;
2251 CGroupController c;
2252
2253 assert(ret);
2254
2255 if (mask == 0) {
2256 *ret = NULL;
2257 return 0;
2258 }
2259
2260 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2261 const char *k;
2262 size_t l;
2263
2264 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2265 continue;
2266
2267 k = cgroup_controller_to_string(c);
2268 l = strlen(k);
2269
2270 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2271 return -ENOMEM;
2272
2273 if (space)
2274 s[n] = ' ';
2275 memcpy(s + n + space, k, l);
2276 n += space + l;
2277
2278 space = true;
2279 }
2280
2281 assert(s);
2282
2283 s[n] = 0;
2284 *ret = s;
2285 s = NULL;
2286
2287 return 0;
2288 }
2289
2290 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2291 assert(mask);
2292 assert(value);
2293
2294 for (;;) {
2295 _cleanup_free_ char *n = NULL;
2296 CGroupController v;
2297 int r;
2298
2299 r = extract_first_word(&value, &n, NULL, 0);
2300 if (r < 0)
2301 return r;
2302 if (r == 0)
2303 break;
2304
2305 v = cgroup_controller_from_string(n);
2306 if (v < 0)
2307 continue;
2308
2309 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2310 }
2311 return 0;
2312 }
2313
2314 int cg_mask_supported(CGroupMask *ret) {
2315 CGroupMask mask = 0;
2316 int r;
2317
2318 /* Determines the mask of supported cgroup controllers. Only
2319 * includes controllers we can make sense of and that are
2320 * actually accessible. */
2321
2322 r = cg_all_unified();
2323 if (r < 0)
2324 return r;
2325 if (r > 0) {
2326 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2327
2328 /* In the unified hierarchy we can read the supported
2329 * and accessible controllers from a the top-level
2330 * cgroup attribute */
2331
2332 r = cg_get_root_path(&root);
2333 if (r < 0)
2334 return r;
2335
2336 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2337 if (r < 0)
2338 return r;
2339
2340 r = read_one_line_file(path, &controllers);
2341 if (r < 0)
2342 return r;
2343
2344 r = cg_mask_from_string(controllers, &mask);
2345 if (r < 0)
2346 return r;
2347
2348 /* Currently, we support the cpu, memory, io and pids
2349 * controller in the unified hierarchy, mask
2350 * everything else off. */
2351 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2352
2353 } else {
2354 CGroupController c;
2355
2356 /* In the legacy hierarchy, we check whether which
2357 * hierarchies are mounted. */
2358
2359 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2360 const char *n;
2361
2362 n = cgroup_controller_to_string(c);
2363 if (controller_is_accessible(n) >= 0)
2364 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2365 }
2366 }
2367
2368 *ret = mask;
2369 return 0;
2370 }
2371
2372 int cg_kernel_controllers(Set *controllers) {
2373 _cleanup_fclose_ FILE *f = NULL;
2374 int r;
2375
2376 assert(controllers);
2377
2378 /* Determines the full list of kernel-known controllers. Might
2379 * include controllers we don't actually support, arbitrary
2380 * named hierarchies and controllers that aren't currently
2381 * accessible (because not mounted). */
2382
2383 f = fopen("/proc/cgroups", "re");
2384 if (!f) {
2385 if (errno == ENOENT)
2386 return 0;
2387 return -errno;
2388 }
2389
2390 /* Ignore the header line */
2391 (void) read_line(f, (size_t) -1, NULL);
2392
2393 for (;;) {
2394 char *controller;
2395 int enabled = 0;
2396
2397 errno = 0;
2398 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2399
2400 if (feof(f))
2401 break;
2402
2403 if (ferror(f) && errno > 0)
2404 return -errno;
2405
2406 return -EBADMSG;
2407 }
2408
2409 if (!enabled) {
2410 free(controller);
2411 continue;
2412 }
2413
2414 if (!cg_controller_is_valid(controller)) {
2415 free(controller);
2416 return -EBADMSG;
2417 }
2418
2419 r = set_consume(controllers, controller);
2420 if (r < 0)
2421 return r;
2422 }
2423
2424 return 0;
2425 }
2426
2427 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2428
2429 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2430 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2431 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2432 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2433 *
2434 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2435 * process management but disable the compat dual layout, we return %true on
2436 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2437 */
2438 static thread_local bool unified_systemd_v232;
2439
2440 static int cg_unified_update(void) {
2441
2442 struct statfs fs;
2443
2444 /* Checks if we support the unified hierarchy. Returns an
2445 * error when the cgroup hierarchies aren't mounted yet or we
2446 * have any other trouble determining if the unified hierarchy
2447 * is supported. */
2448
2449 if (unified_cache >= CGROUP_UNIFIED_NONE)
2450 return 0;
2451
2452 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2453 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2454
2455 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2456 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2457 unified_cache = CGROUP_UNIFIED_ALL;
2458 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2459 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2460 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2461 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2462 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2463 unified_systemd_v232 = false;
2464 } else {
2465 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2466 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2467
2468 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2469 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2470 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2471 unified_systemd_v232 = true;
2472 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2473 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2474 unified_cache = CGROUP_UNIFIED_NONE;
2475 } else {
2476 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2477 (unsigned long long) fs.f_type);
2478 unified_cache = CGROUP_UNIFIED_NONE;
2479 }
2480 }
2481 } else {
2482 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2483 (unsigned long long) fs.f_type);
2484 return -ENOMEDIUM;
2485 }
2486
2487 return 0;
2488 }
2489
2490 int cg_unified_controller(const char *controller) {
2491 int r;
2492
2493 r = cg_unified_update();
2494 if (r < 0)
2495 return r;
2496
2497 if (unified_cache == CGROUP_UNIFIED_NONE)
2498 return false;
2499
2500 if (unified_cache >= CGROUP_UNIFIED_ALL)
2501 return true;
2502
2503 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2504 }
2505
2506 int cg_all_unified(void) {
2507 int r;
2508
2509 r = cg_unified_update();
2510 if (r < 0)
2511 return r;
2512
2513 return unified_cache >= CGROUP_UNIFIED_ALL;
2514 }
2515
2516 int cg_hybrid_unified(void) {
2517 int r;
2518
2519 r = cg_unified_update();
2520 if (r < 0)
2521 return r;
2522
2523 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2524 }
2525
2526 int cg_unified_flush(void) {
2527 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2528
2529 return cg_unified_update();
2530 }
2531
2532 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2533 _cleanup_free_ char *fs = NULL;
2534 CGroupController c;
2535 int r;
2536
2537 assert(p);
2538
2539 if (supported == 0)
2540 return 0;
2541
2542 r = cg_all_unified();
2543 if (r < 0)
2544 return r;
2545 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2546 return 0;
2547
2548 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2549 if (r < 0)
2550 return r;
2551
2552 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2553 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2554 const char *n;
2555
2556 if (!(supported & bit))
2557 continue;
2558
2559 n = cgroup_controller_to_string(c);
2560 {
2561 char s[1 + strlen(n) + 1];
2562
2563 s[0] = mask & bit ? '+' : '-';
2564 strcpy(s + 1, n);
2565
2566 r = write_string_file(fs, s, 0);
2567 if (r < 0)
2568 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2569 }
2570 }
2571
2572 return 0;
2573 }
2574
2575 bool cg_is_unified_wanted(void) {
2576 static thread_local int wanted = -1;
2577 int r;
2578 bool b;
2579 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2580
2581 /* If we have a cached value, return that. */
2582 if (wanted >= 0)
2583 return wanted;
2584
2585 /* If the hierarchy is already mounted, then follow whatever
2586 * was chosen for it. */
2587 if (cg_unified_flush() >= 0)
2588 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2589
2590 /* Otherwise, let's see what the kernel command line has to say.
2591 * Since checking is expensive, cache a non-error result. */
2592 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2593
2594 return (wanted = r > 0 ? b : is_default);
2595 }
2596
2597 bool cg_is_legacy_wanted(void) {
2598 static thread_local int wanted = -1;
2599
2600 /* If we have a cached value, return that. */
2601 if (wanted >= 0)
2602 return wanted;
2603
2604 /* Check if we have cgroups2 already mounted. */
2605 if (cg_unified_flush() >= 0 &&
2606 unified_cache == CGROUP_UNIFIED_ALL)
2607 return (wanted = false);
2608
2609 /* Otherwise, assume that at least partial legacy is wanted,
2610 * since cgroups2 should already be mounted at this point. */
2611 return (wanted = true);
2612 }
2613
2614 bool cg_is_hybrid_wanted(void) {
2615 static thread_local int wanted = -1;
2616 int r;
2617 bool b;
2618 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2619 /* We default to true if the default is "hybrid", obviously,
2620 * but also when the default is "unified", because if we get
2621 * called, it means that unified hierarchy was not mounted. */
2622
2623 /* If we have a cached value, return that. */
2624 if (wanted >= 0)
2625 return wanted;
2626
2627 /* If the hierarchy is already mounted, then follow whatever
2628 * was chosen for it. */
2629 if (cg_unified_flush() >= 0 &&
2630 unified_cache == CGROUP_UNIFIED_ALL)
2631 return (wanted = false);
2632
2633 /* Otherwise, let's see what the kernel command line has to say.
2634 * Since checking is expensive, cache a non-error result. */
2635 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2636
2637 /* The meaning of the kernel option is reversed wrt. to the return value
2638 * of this function, hence the negation. */
2639 return (wanted = r > 0 ? !b : is_default);
2640 }
2641
2642 int cg_weight_parse(const char *s, uint64_t *ret) {
2643 uint64_t u;
2644 int r;
2645
2646 if (isempty(s)) {
2647 *ret = CGROUP_WEIGHT_INVALID;
2648 return 0;
2649 }
2650
2651 r = safe_atou64(s, &u);
2652 if (r < 0)
2653 return r;
2654
2655 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2656 return -ERANGE;
2657
2658 *ret = u;
2659 return 0;
2660 }
2661
2662 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2663 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2664 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2665 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2666 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2667 };
2668
2669 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2670 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2671 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2672 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2673 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2674 };
2675
2676 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2677
2678 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2679 uint64_t u;
2680 int r;
2681
2682 if (isempty(s)) {
2683 *ret = CGROUP_CPU_SHARES_INVALID;
2684 return 0;
2685 }
2686
2687 r = safe_atou64(s, &u);
2688 if (r < 0)
2689 return r;
2690
2691 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2692 return -ERANGE;
2693
2694 *ret = u;
2695 return 0;
2696 }
2697
2698 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2699 uint64_t u;
2700 int r;
2701
2702 if (isempty(s)) {
2703 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2704 return 0;
2705 }
2706
2707 r = safe_atou64(s, &u);
2708 if (r < 0)
2709 return r;
2710
2711 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2712 return -ERANGE;
2713
2714 *ret = u;
2715 return 0;
2716 }
2717
2718 bool is_cgroup_fs(const struct statfs *s) {
2719 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2720 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2721 }
2722
2723 bool fd_is_cgroup_fs(int fd) {
2724 struct statfs s;
2725
2726 if (fstatfs(fd, &s) < 0)
2727 return -errno;
2728
2729 return is_cgroup_fs(&s);
2730 }
2731
2732 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2733 [CGROUP_CONTROLLER_CPU] = "cpu",
2734 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2735 [CGROUP_CONTROLLER_IO] = "io",
2736 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2737 [CGROUP_CONTROLLER_MEMORY] = "memory",
2738 [CGROUP_CONTROLLER_DEVICES] = "devices",
2739 [CGROUP_CONTROLLER_PIDS] = "pids",
2740 };
2741
2742 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);