]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #7469 from kinvolk/dongsu/nspawn-netns
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <dirent.h>
22 #include <errno.h>
23 #include <ftw.h>
24 #include <limits.h>
25 #include <signal.h>
26 #include <stddef.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/statfs.h>
32 #include <sys/types.h>
33 #include <sys/xattr.h>
34 #include <unistd.h>
35
36 #include "alloc-util.h"
37 #include "cgroup-util.h"
38 #include "def.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "log.h"
46 #include "login-util.h"
47 #include "macro.h"
48 #include "missing.h"
49 #include "mkdir.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "proc-cmdline.h"
53 #include "process-util.h"
54 #include "set.h"
55 #include "special.h"
56 #include "stat-util.h"
57 #include "stdio-util.h"
58 #include "string-table.h"
59 #include "string-util.h"
60 #include "strv.h"
61 #include "unit-name.h"
62 #include "user-util.h"
63
64 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
65 _cleanup_free_ char *fs = NULL;
66 FILE *f;
67 int r;
68
69 assert(_f);
70
71 r = cg_get_path(controller, path, "cgroup.procs", &fs);
72 if (r < 0)
73 return r;
74
75 f = fopen(fs, "re");
76 if (!f)
77 return -errno;
78
79 *_f = f;
80 return 0;
81 }
82
83 int cg_read_pid(FILE *f, pid_t *_pid) {
84 unsigned long ul;
85
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
88
89 assert(f);
90 assert(_pid);
91
92 errno = 0;
93 if (fscanf(f, "%lu", &ul) != 1) {
94
95 if (feof(f))
96 return 0;
97
98 return errno > 0 ? -errno : -EIO;
99 }
100
101 if (ul <= 0)
102 return -EIO;
103
104 *_pid = (pid_t) ul;
105 return 1;
106 }
107
108 int cg_read_event(
109 const char *controller,
110 const char *path,
111 const char *event,
112 char **val) {
113
114 _cleanup_free_ char *events = NULL, *content = NULL;
115 char *p, *line;
116 int r;
117
118 r = cg_get_path(controller, path, "cgroup.events", &events);
119 if (r < 0)
120 return r;
121
122 r = read_full_file(events, &content, NULL);
123 if (r < 0)
124 return r;
125
126 p = content;
127 while ((line = strsep(&p, "\n"))) {
128 char *key;
129
130 key = strsep(&line, " ");
131 if (!key || !line)
132 return -EINVAL;
133
134 if (strcmp(key, event))
135 continue;
136
137 *val = strdup(line);
138 return 0;
139 }
140
141 return -ENOENT;
142 }
143
144 bool cg_ns_supported(void) {
145 static thread_local int enabled = -1;
146
147 if (enabled >= 0)
148 return enabled;
149
150 if (access("/proc/self/ns/cgroup", F_OK) == 0)
151 enabled = 1;
152 else
153 enabled = 0;
154
155 return enabled;
156 }
157
158 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
159 _cleanup_free_ char *fs = NULL;
160 int r;
161 DIR *d;
162
163 assert(_d);
164
165 /* This is not recursive! */
166
167 r = cg_get_path(controller, path, NULL, &fs);
168 if (r < 0)
169 return r;
170
171 d = opendir(fs);
172 if (!d)
173 return -errno;
174
175 *_d = d;
176 return 0;
177 }
178
179 int cg_read_subgroup(DIR *d, char **fn) {
180 struct dirent *de;
181
182 assert(d);
183 assert(fn);
184
185 FOREACH_DIRENT_ALL(de, d, return -errno) {
186 char *b;
187
188 if (de->d_type != DT_DIR)
189 continue;
190
191 if (dot_or_dot_dot(de->d_name))
192 continue;
193
194 b = strdup(de->d_name);
195 if (!b)
196 return -ENOMEM;
197
198 *fn = b;
199 return 1;
200 }
201
202 return 0;
203 }
204
205 int cg_rmdir(const char *controller, const char *path) {
206 _cleanup_free_ char *p = NULL;
207 int r;
208
209 r = cg_get_path(controller, path, NULL, &p);
210 if (r < 0)
211 return r;
212
213 r = rmdir(p);
214 if (r < 0 && errno != ENOENT)
215 return -errno;
216
217 r = cg_hybrid_unified();
218 if (r < 0)
219 return r;
220 if (r == 0)
221 return 0;
222
223 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
224 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
225 if (r < 0)
226 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
227 }
228
229 return 0;
230 }
231
232 int cg_kill(
233 const char *controller,
234 const char *path,
235 int sig,
236 CGroupFlags flags,
237 Set *s,
238 cg_kill_log_func_t log_kill,
239 void *userdata) {
240
241 _cleanup_set_free_ Set *allocated_set = NULL;
242 bool done = false;
243 int r, ret = 0;
244 pid_t my_pid;
245
246 assert(sig >= 0);
247
248 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
249 * SIGCONT on SIGKILL. */
250 if (IN_SET(sig, SIGCONT, SIGKILL))
251 flags &= ~CGROUP_SIGCONT;
252
253 /* This goes through the tasks list and kills them all. This
254 * is repeated until no further processes are added to the
255 * tasks list, to properly handle forking processes */
256
257 if (!s) {
258 s = allocated_set = set_new(NULL);
259 if (!s)
260 return -ENOMEM;
261 }
262
263 my_pid = getpid_cached();
264
265 do {
266 _cleanup_fclose_ FILE *f = NULL;
267 pid_t pid = 0;
268 done = true;
269
270 r = cg_enumerate_processes(controller, path, &f);
271 if (r < 0) {
272 if (ret >= 0 && r != -ENOENT)
273 return r;
274
275 return ret;
276 }
277
278 while ((r = cg_read_pid(f, &pid)) > 0) {
279
280 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
281 continue;
282
283 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
284 continue;
285
286 if (log_kill)
287 log_kill(pid, sig, userdata);
288
289 /* If we haven't killed this process yet, kill
290 * it */
291 if (kill(pid, sig) < 0) {
292 if (ret >= 0 && errno != ESRCH)
293 ret = -errno;
294 } else {
295 if (flags & CGROUP_SIGCONT)
296 (void) kill(pid, SIGCONT);
297
298 if (ret == 0)
299 ret = 1;
300 }
301
302 done = false;
303
304 r = set_put(s, PID_TO_PTR(pid));
305 if (r < 0) {
306 if (ret >= 0)
307 return r;
308
309 return ret;
310 }
311 }
312
313 if (r < 0) {
314 if (ret >= 0)
315 return r;
316
317 return ret;
318 }
319
320 /* To avoid racing against processes which fork
321 * quicker than we can kill them we repeat this until
322 * no new pids need to be killed. */
323
324 } while (!done);
325
326 return ret;
327 }
328
329 int cg_kill_recursive(
330 const char *controller,
331 const char *path,
332 int sig,
333 CGroupFlags flags,
334 Set *s,
335 cg_kill_log_func_t log_kill,
336 void *userdata) {
337
338 _cleanup_set_free_ Set *allocated_set = NULL;
339 _cleanup_closedir_ DIR *d = NULL;
340 int r, ret;
341 char *fn;
342
343 assert(path);
344 assert(sig >= 0);
345
346 if (!s) {
347 s = allocated_set = set_new(NULL);
348 if (!s)
349 return -ENOMEM;
350 }
351
352 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
353
354 r = cg_enumerate_subgroups(controller, path, &d);
355 if (r < 0) {
356 if (ret >= 0 && r != -ENOENT)
357 return r;
358
359 return ret;
360 }
361
362 while ((r = cg_read_subgroup(d, &fn)) > 0) {
363 _cleanup_free_ char *p = NULL;
364
365 p = strjoin(path, "/", fn);
366 free(fn);
367 if (!p)
368 return -ENOMEM;
369
370 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
371 if (r != 0 && ret >= 0)
372 ret = r;
373 }
374 if (ret >= 0 && r < 0)
375 ret = r;
376
377 if (flags & CGROUP_REMOVE) {
378 r = cg_rmdir(controller, path);
379 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
380 return r;
381 }
382
383 return ret;
384 }
385
386 int cg_migrate(
387 const char *cfrom,
388 const char *pfrom,
389 const char *cto,
390 const char *pto,
391 CGroupFlags flags) {
392
393 bool done = false;
394 _cleanup_set_free_ Set *s = NULL;
395 int r, ret = 0;
396 pid_t my_pid;
397
398 assert(cfrom);
399 assert(pfrom);
400 assert(cto);
401 assert(pto);
402
403 s = set_new(NULL);
404 if (!s)
405 return -ENOMEM;
406
407 my_pid = getpid_cached();
408
409 do {
410 _cleanup_fclose_ FILE *f = NULL;
411 pid_t pid = 0;
412 done = true;
413
414 r = cg_enumerate_processes(cfrom, pfrom, &f);
415 if (r < 0) {
416 if (ret >= 0 && r != -ENOENT)
417 return r;
418
419 return ret;
420 }
421
422 while ((r = cg_read_pid(f, &pid)) > 0) {
423
424 /* This might do weird stuff if we aren't a
425 * single-threaded program. However, we
426 * luckily know we are not */
427 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
428 continue;
429
430 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
431 continue;
432
433 /* Ignore kernel threads. Since they can only
434 * exist in the root cgroup, we only check for
435 * them there. */
436 if (cfrom &&
437 (isempty(pfrom) || path_equal(pfrom, "/")) &&
438 is_kernel_thread(pid) > 0)
439 continue;
440
441 r = cg_attach(cto, pto, pid);
442 if (r < 0) {
443 if (ret >= 0 && r != -ESRCH)
444 ret = r;
445 } else if (ret == 0)
446 ret = 1;
447
448 done = false;
449
450 r = set_put(s, PID_TO_PTR(pid));
451 if (r < 0) {
452 if (ret >= 0)
453 return r;
454
455 return ret;
456 }
457 }
458
459 if (r < 0) {
460 if (ret >= 0)
461 return r;
462
463 return ret;
464 }
465 } while (!done);
466
467 return ret;
468 }
469
470 int cg_migrate_recursive(
471 const char *cfrom,
472 const char *pfrom,
473 const char *cto,
474 const char *pto,
475 CGroupFlags flags) {
476
477 _cleanup_closedir_ DIR *d = NULL;
478 int r, ret = 0;
479 char *fn;
480
481 assert(cfrom);
482 assert(pfrom);
483 assert(cto);
484 assert(pto);
485
486 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
487
488 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
489 if (r < 0) {
490 if (ret >= 0 && r != -ENOENT)
491 return r;
492
493 return ret;
494 }
495
496 while ((r = cg_read_subgroup(d, &fn)) > 0) {
497 _cleanup_free_ char *p = NULL;
498
499 p = strjoin(pfrom, "/", fn);
500 free(fn);
501 if (!p)
502 return -ENOMEM;
503
504 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
505 if (r != 0 && ret >= 0)
506 ret = r;
507 }
508
509 if (r < 0 && ret >= 0)
510 ret = r;
511
512 if (flags & CGROUP_REMOVE) {
513 r = cg_rmdir(cfrom, pfrom);
514 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
515 return r;
516 }
517
518 return ret;
519 }
520
521 int cg_migrate_recursive_fallback(
522 const char *cfrom,
523 const char *pfrom,
524 const char *cto,
525 const char *pto,
526 CGroupFlags flags) {
527
528 int r;
529
530 assert(cfrom);
531 assert(pfrom);
532 assert(cto);
533 assert(pto);
534
535 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
536 if (r < 0) {
537 char prefix[strlen(pto) + 1];
538
539 /* This didn't work? Then let's try all prefixes of the destination */
540
541 PATH_FOREACH_PREFIX(prefix, pto) {
542 int q;
543
544 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
545 if (q >= 0)
546 return q;
547 }
548 }
549
550 return r;
551 }
552
553 static const char *controller_to_dirname(const char *controller) {
554 const char *e;
555
556 assert(controller);
557
558 /* Converts a controller name to the directory name below
559 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
560 * just cuts off the name= prefixed used for named
561 * hierarchies, if it is specified. */
562
563 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
564 if (cg_hybrid_unified() > 0)
565 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
566 else
567 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
568 }
569
570 e = startswith(controller, "name=");
571 if (e)
572 return e;
573
574 return controller;
575 }
576
577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
578 const char *dn;
579 char *t = NULL;
580
581 assert(fs);
582 assert(controller);
583
584 dn = controller_to_dirname(controller);
585
586 if (isempty(path) && isempty(suffix))
587 t = strappend("/sys/fs/cgroup/", dn);
588 else if (isempty(path))
589 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
590 else if (isempty(suffix))
591 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
592 else
593 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
594 if (!t)
595 return -ENOMEM;
596
597 *fs = t;
598 return 0;
599 }
600
601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
602 char *t;
603
604 assert(fs);
605
606 if (isempty(path) && isempty(suffix))
607 t = strdup("/sys/fs/cgroup");
608 else if (isempty(path))
609 t = strappend("/sys/fs/cgroup/", suffix);
610 else if (isempty(suffix))
611 t = strappend("/sys/fs/cgroup/", path);
612 else
613 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
614 if (!t)
615 return -ENOMEM;
616
617 *fs = t;
618 return 0;
619 }
620
621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
622 int r;
623
624 assert(fs);
625
626 if (!controller) {
627 char *t;
628
629 /* If no controller is specified, we return the path
630 * *below* the controllers, without any prefix. */
631
632 if (!path && !suffix)
633 return -EINVAL;
634
635 if (!suffix)
636 t = strdup(path);
637 else if (!path)
638 t = strdup(suffix);
639 else
640 t = strjoin(path, "/", suffix);
641 if (!t)
642 return -ENOMEM;
643
644 *fs = path_kill_slashes(t);
645 return 0;
646 }
647
648 if (!cg_controller_is_valid(controller))
649 return -EINVAL;
650
651 r = cg_all_unified();
652 if (r < 0)
653 return r;
654 if (r > 0)
655 r = join_path_unified(path, suffix, fs);
656 else
657 r = join_path_legacy(controller, path, suffix, fs);
658 if (r < 0)
659 return r;
660
661 path_kill_slashes(*fs);
662 return 0;
663 }
664
665 static int controller_is_accessible(const char *controller) {
666 int r;
667
668 assert(controller);
669
670 /* Checks whether a specific controller is accessible,
671 * i.e. its hierarchy mounted. In the unified hierarchy all
672 * controllers are considered accessible, except for the named
673 * hierarchies */
674
675 if (!cg_controller_is_valid(controller))
676 return -EINVAL;
677
678 r = cg_all_unified();
679 if (r < 0)
680 return r;
681 if (r > 0) {
682 /* We don't support named hierarchies if we are using
683 * the unified hierarchy. */
684
685 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
686 return 0;
687
688 if (startswith(controller, "name="))
689 return -EOPNOTSUPP;
690
691 } else {
692 const char *cc, *dn;
693
694 dn = controller_to_dirname(controller);
695 cc = strjoina("/sys/fs/cgroup/", dn);
696
697 if (laccess(cc, F_OK) < 0)
698 return -errno;
699 }
700
701 return 0;
702 }
703
704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
705 int r;
706
707 assert(controller);
708 assert(fs);
709
710 /* Check if the specified controller is actually accessible */
711 r = controller_is_accessible(controller);
712 if (r < 0)
713 return r;
714
715 return cg_get_path(controller, path, suffix, fs);
716 }
717
718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
719 assert(path);
720 assert(sb);
721 assert(ftwbuf);
722
723 if (typeflag != FTW_DP)
724 return 0;
725
726 if (ftwbuf->level < 1)
727 return 0;
728
729 (void) rmdir(path);
730 return 0;
731 }
732
733 int cg_trim(const char *controller, const char *path, bool delete_root) {
734 _cleanup_free_ char *fs = NULL;
735 int r = 0, q;
736
737 assert(path);
738
739 r = cg_get_path(controller, path, NULL, &fs);
740 if (r < 0)
741 return r;
742
743 errno = 0;
744 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
745 if (errno == ENOENT)
746 r = 0;
747 else if (errno > 0)
748 r = -errno;
749 else
750 r = -EIO;
751 }
752
753 if (delete_root) {
754 if (rmdir(fs) < 0 && errno != ENOENT)
755 return -errno;
756 }
757
758 q = cg_hybrid_unified();
759 if (q < 0)
760 return q;
761 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
762 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
763 if (q < 0)
764 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
765 }
766
767 return r;
768 }
769
770 int cg_create(const char *controller, const char *path) {
771 _cleanup_free_ char *fs = NULL;
772 int r;
773
774 r = cg_get_path_and_check(controller, path, NULL, &fs);
775 if (r < 0)
776 return r;
777
778 r = mkdir_parents(fs, 0755);
779 if (r < 0)
780 return r;
781
782 if (mkdir(fs, 0755) < 0) {
783
784 if (errno == EEXIST)
785 return 0;
786
787 return -errno;
788 }
789
790 r = cg_hybrid_unified();
791 if (r < 0)
792 return r;
793
794 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
795 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
796 if (r < 0)
797 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
798 }
799
800 return 1;
801 }
802
803 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
804 int r, q;
805
806 assert(pid >= 0);
807
808 r = cg_create(controller, path);
809 if (r < 0)
810 return r;
811
812 q = cg_attach(controller, path, pid);
813 if (q < 0)
814 return q;
815
816 /* This does not remove the cgroup on failure */
817 return r;
818 }
819
820 int cg_attach(const char *controller, const char *path, pid_t pid) {
821 _cleanup_free_ char *fs = NULL;
822 char c[DECIMAL_STR_MAX(pid_t) + 2];
823 int r;
824
825 assert(path);
826 assert(pid >= 0);
827
828 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
829 if (r < 0)
830 return r;
831
832 if (pid == 0)
833 pid = getpid_cached();
834
835 xsprintf(c, PID_FMT "\n", pid);
836
837 r = write_string_file(fs, c, 0);
838 if (r < 0)
839 return r;
840
841 r = cg_hybrid_unified();
842 if (r < 0)
843 return r;
844
845 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
846 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
847 if (r < 0)
848 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
849 }
850
851 return 0;
852 }
853
854 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
855 int r;
856
857 assert(controller);
858 assert(path);
859 assert(pid >= 0);
860
861 r = cg_attach(controller, path, pid);
862 if (r < 0) {
863 char prefix[strlen(path) + 1];
864
865 /* This didn't work? Then let's try all prefixes of
866 * the destination */
867
868 PATH_FOREACH_PREFIX(prefix, path) {
869 int q;
870
871 q = cg_attach(controller, prefix, pid);
872 if (q >= 0)
873 return q;
874 }
875 }
876
877 return r;
878 }
879
880 int cg_set_access(
881 const char *controller,
882 const char *path,
883 uid_t uid,
884 gid_t gid) {
885
886 struct Attribute {
887 const char *name;
888 bool fatal;
889 };
890
891 /* cgroupsv1, aka legacy/non-unified */
892 static const struct Attribute legacy_attributes[] = {
893 { "cgroup.procs", true },
894 { "tasks", false },
895 { "cgroup.clone_children", false },
896 {},
897 };
898
899 /* cgroupsv2, aka unified */
900 static const struct Attribute unified_attributes[] = {
901 { "cgroup.procs", true },
902 { "cgroup.subtree_control", true },
903 { "cgroup.threads", false },
904 {},
905 };
906
907 static const struct Attribute* const attributes[] = {
908 [false] = legacy_attributes,
909 [true] = unified_attributes,
910 };
911
912 _cleanup_free_ char *fs = NULL;
913 const struct Attribute *i;
914 int r, unified;
915
916 assert(path);
917
918 if (uid == UID_INVALID && gid == GID_INVALID)
919 return 0;
920
921 unified = cg_unified_controller(controller);
922 if (unified < 0)
923 return unified;
924
925 /* Configure access to the cgroup itself */
926 r = cg_get_path(controller, path, NULL, &fs);
927 if (r < 0)
928 return r;
929
930 r = chmod_and_chown(fs, 0755, uid, gid);
931 if (r < 0)
932 return r;
933
934 /* Configure access to the cgroup's attributes */
935 for (i = attributes[unified]; i->name; i++) {
936 fs = mfree(fs);
937
938 r = cg_get_path(controller, path, i->name, &fs);
939 if (r < 0)
940 return r;
941
942 r = chmod_and_chown(fs, 0644, uid, gid);
943 if (r < 0) {
944 if (i->fatal)
945 return r;
946
947 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
948 }
949 }
950
951 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
952 r = cg_hybrid_unified();
953 if (r < 0)
954 return r;
955 if (r > 0) {
956 /* Always propagate access mode from unified to legacy controller */
957 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
958 if (r < 0)
959 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
960 }
961 }
962
963 return 0;
964 }
965
966 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
967 _cleanup_free_ char *fs = NULL;
968 int r;
969
970 assert(path);
971 assert(name);
972 assert(value || size <= 0);
973
974 r = cg_get_path(controller, path, NULL, &fs);
975 if (r < 0)
976 return r;
977
978 if (setxattr(fs, name, value, size, flags) < 0)
979 return -errno;
980
981 return 0;
982 }
983
984 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
985 _cleanup_free_ char *fs = NULL;
986 ssize_t n;
987 int r;
988
989 assert(path);
990 assert(name);
991
992 r = cg_get_path(controller, path, NULL, &fs);
993 if (r < 0)
994 return r;
995
996 n = getxattr(fs, name, value, size);
997 if (n < 0)
998 return -errno;
999
1000 return (int) n;
1001 }
1002
1003 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1004 _cleanup_fclose_ FILE *f = NULL;
1005 char line[LINE_MAX];
1006 const char *fs, *controller_str;
1007 size_t cs = 0;
1008 int unified;
1009
1010 assert(path);
1011 assert(pid >= 0);
1012
1013 if (controller) {
1014 if (!cg_controller_is_valid(controller))
1015 return -EINVAL;
1016 } else
1017 controller = SYSTEMD_CGROUP_CONTROLLER;
1018
1019 unified = cg_unified_controller(controller);
1020 if (unified < 0)
1021 return unified;
1022 if (unified == 0) {
1023 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1024 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1025 else
1026 controller_str = controller;
1027
1028 cs = strlen(controller_str);
1029 }
1030
1031 fs = procfs_file_alloca(pid, "cgroup");
1032 f = fopen(fs, "re");
1033 if (!f)
1034 return errno == ENOENT ? -ESRCH : -errno;
1035
1036 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1037
1038 FOREACH_LINE(line, f, return -errno) {
1039 char *e, *p;
1040
1041 truncate_nl(line);
1042
1043 if (unified) {
1044 e = startswith(line, "0:");
1045 if (!e)
1046 continue;
1047
1048 e = strchr(e, ':');
1049 if (!e)
1050 continue;
1051 } else {
1052 char *l;
1053 size_t k;
1054 const char *word, *state;
1055 bool found = false;
1056
1057 l = strchr(line, ':');
1058 if (!l)
1059 continue;
1060
1061 l++;
1062 e = strchr(l, ':');
1063 if (!e)
1064 continue;
1065
1066 *e = 0;
1067 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1068 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1069 found = true;
1070 break;
1071 }
1072 }
1073
1074 if (!found)
1075 continue;
1076 }
1077
1078 p = strdup(e + 1);
1079 if (!p)
1080 return -ENOMEM;
1081
1082 /* Truncate suffix indicating the process is a zombie */
1083 e = endswith(p, " (deleted)");
1084 if (e)
1085 *e = 0;
1086
1087 *path = p;
1088 return 0;
1089 }
1090
1091 return -ENODATA;
1092 }
1093
1094 int cg_install_release_agent(const char *controller, const char *agent) {
1095 _cleanup_free_ char *fs = NULL, *contents = NULL;
1096 const char *sc;
1097 int r;
1098
1099 assert(agent);
1100
1101 r = cg_unified_controller(controller);
1102 if (r < 0)
1103 return r;
1104 if (r > 0) /* doesn't apply to unified hierarchy */
1105 return -EOPNOTSUPP;
1106
1107 r = cg_get_path(controller, NULL, "release_agent", &fs);
1108 if (r < 0)
1109 return r;
1110
1111 r = read_one_line_file(fs, &contents);
1112 if (r < 0)
1113 return r;
1114
1115 sc = strstrip(contents);
1116 if (isempty(sc)) {
1117 r = write_string_file(fs, agent, 0);
1118 if (r < 0)
1119 return r;
1120 } else if (!path_equal(sc, agent))
1121 return -EEXIST;
1122
1123 fs = mfree(fs);
1124 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1125 if (r < 0)
1126 return r;
1127
1128 contents = mfree(contents);
1129 r = read_one_line_file(fs, &contents);
1130 if (r < 0)
1131 return r;
1132
1133 sc = strstrip(contents);
1134 if (streq(sc, "0")) {
1135 r = write_string_file(fs, "1", 0);
1136 if (r < 0)
1137 return r;
1138
1139 return 1;
1140 }
1141
1142 if (!streq(sc, "1"))
1143 return -EIO;
1144
1145 return 0;
1146 }
1147
1148 int cg_uninstall_release_agent(const char *controller) {
1149 _cleanup_free_ char *fs = NULL;
1150 int r;
1151
1152 r = cg_unified_controller(controller);
1153 if (r < 0)
1154 return r;
1155 if (r > 0) /* Doesn't apply to unified hierarchy */
1156 return -EOPNOTSUPP;
1157
1158 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1159 if (r < 0)
1160 return r;
1161
1162 r = write_string_file(fs, "0", 0);
1163 if (r < 0)
1164 return r;
1165
1166 fs = mfree(fs);
1167
1168 r = cg_get_path(controller, NULL, "release_agent", &fs);
1169 if (r < 0)
1170 return r;
1171
1172 r = write_string_file(fs, "", 0);
1173 if (r < 0)
1174 return r;
1175
1176 return 0;
1177 }
1178
1179 int cg_is_empty(const char *controller, const char *path) {
1180 _cleanup_fclose_ FILE *f = NULL;
1181 pid_t pid;
1182 int r;
1183
1184 assert(path);
1185
1186 r = cg_enumerate_processes(controller, path, &f);
1187 if (r == -ENOENT)
1188 return 1;
1189 if (r < 0)
1190 return r;
1191
1192 r = cg_read_pid(f, &pid);
1193 if (r < 0)
1194 return r;
1195
1196 return r == 0;
1197 }
1198
1199 int cg_is_empty_recursive(const char *controller, const char *path) {
1200 int r;
1201
1202 assert(path);
1203
1204 /* The root cgroup is always populated */
1205 if (controller && (isempty(path) || path_equal(path, "/")))
1206 return false;
1207
1208 r = cg_unified_controller(controller);
1209 if (r < 0)
1210 return r;
1211 if (r > 0) {
1212 _cleanup_free_ char *t = NULL;
1213
1214 /* On the unified hierarchy we can check empty state
1215 * via the "populated" attribute of "cgroup.events". */
1216
1217 r = cg_read_event(controller, path, "populated", &t);
1218 if (r < 0)
1219 return r;
1220
1221 return streq(t, "0");
1222 } else {
1223 _cleanup_closedir_ DIR *d = NULL;
1224 char *fn;
1225
1226 r = cg_is_empty(controller, path);
1227 if (r <= 0)
1228 return r;
1229
1230 r = cg_enumerate_subgroups(controller, path, &d);
1231 if (r == -ENOENT)
1232 return 1;
1233 if (r < 0)
1234 return r;
1235
1236 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1237 _cleanup_free_ char *p = NULL;
1238
1239 p = strjoin(path, "/", fn);
1240 free(fn);
1241 if (!p)
1242 return -ENOMEM;
1243
1244 r = cg_is_empty_recursive(controller, p);
1245 if (r <= 0)
1246 return r;
1247 }
1248 if (r < 0)
1249 return r;
1250
1251 return true;
1252 }
1253 }
1254
1255 int cg_split_spec(const char *spec, char **controller, char **path) {
1256 char *t = NULL, *u = NULL;
1257 const char *e;
1258
1259 assert(spec);
1260
1261 if (*spec == '/') {
1262 if (!path_is_normalized(spec))
1263 return -EINVAL;
1264
1265 if (path) {
1266 t = strdup(spec);
1267 if (!t)
1268 return -ENOMEM;
1269
1270 *path = path_kill_slashes(t);
1271 }
1272
1273 if (controller)
1274 *controller = NULL;
1275
1276 return 0;
1277 }
1278
1279 e = strchr(spec, ':');
1280 if (!e) {
1281 if (!cg_controller_is_valid(spec))
1282 return -EINVAL;
1283
1284 if (controller) {
1285 t = strdup(spec);
1286 if (!t)
1287 return -ENOMEM;
1288
1289 *controller = t;
1290 }
1291
1292 if (path)
1293 *path = NULL;
1294
1295 return 0;
1296 }
1297
1298 t = strndup(spec, e-spec);
1299 if (!t)
1300 return -ENOMEM;
1301 if (!cg_controller_is_valid(t)) {
1302 free(t);
1303 return -EINVAL;
1304 }
1305
1306 if (isempty(e+1))
1307 u = NULL;
1308 else {
1309 u = strdup(e+1);
1310 if (!u) {
1311 free(t);
1312 return -ENOMEM;
1313 }
1314
1315 if (!path_is_normalized(u) ||
1316 !path_is_absolute(u)) {
1317 free(t);
1318 free(u);
1319 return -EINVAL;
1320 }
1321
1322 path_kill_slashes(u);
1323 }
1324
1325 if (controller)
1326 *controller = t;
1327 else
1328 free(t);
1329
1330 if (path)
1331 *path = u;
1332 else
1333 free(u);
1334
1335 return 0;
1336 }
1337
1338 int cg_mangle_path(const char *path, char **result) {
1339 _cleanup_free_ char *c = NULL, *p = NULL;
1340 char *t;
1341 int r;
1342
1343 assert(path);
1344 assert(result);
1345
1346 /* First, check if it already is a filesystem path */
1347 if (path_startswith(path, "/sys/fs/cgroup")) {
1348
1349 t = strdup(path);
1350 if (!t)
1351 return -ENOMEM;
1352
1353 *result = path_kill_slashes(t);
1354 return 0;
1355 }
1356
1357 /* Otherwise, treat it as cg spec */
1358 r = cg_split_spec(path, &c, &p);
1359 if (r < 0)
1360 return r;
1361
1362 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1363 }
1364
1365 int cg_get_root_path(char **path) {
1366 char *p, *e;
1367 int r;
1368
1369 assert(path);
1370
1371 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1372 if (r < 0)
1373 return r;
1374
1375 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1376 if (!e)
1377 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1378 if (!e)
1379 e = endswith(p, "/system"); /* even more legacy */
1380 if (e)
1381 *e = 0;
1382
1383 *path = p;
1384 return 0;
1385 }
1386
1387 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1388 _cleanup_free_ char *rt = NULL;
1389 char *p;
1390 int r;
1391
1392 assert(cgroup);
1393 assert(shifted);
1394
1395 if (!root) {
1396 /* If the root was specified let's use that, otherwise
1397 * let's determine it from PID 1 */
1398
1399 r = cg_get_root_path(&rt);
1400 if (r < 0)
1401 return r;
1402
1403 root = rt;
1404 }
1405
1406 p = path_startswith(cgroup, root);
1407 if (p && p > cgroup)
1408 *shifted = p - 1;
1409 else
1410 *shifted = cgroup;
1411
1412 return 0;
1413 }
1414
1415 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1416 _cleanup_free_ char *raw = NULL;
1417 const char *c;
1418 int r;
1419
1420 assert(pid >= 0);
1421 assert(cgroup);
1422
1423 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1424 if (r < 0)
1425 return r;
1426
1427 r = cg_shift_path(raw, root, &c);
1428 if (r < 0)
1429 return r;
1430
1431 if (c == raw) {
1432 *cgroup = raw;
1433 raw = NULL;
1434 } else {
1435 char *n;
1436
1437 n = strdup(c);
1438 if (!n)
1439 return -ENOMEM;
1440
1441 *cgroup = n;
1442 }
1443
1444 return 0;
1445 }
1446
1447 int cg_path_decode_unit(const char *cgroup, char **unit) {
1448 char *c, *s;
1449 size_t n;
1450
1451 assert(cgroup);
1452 assert(unit);
1453
1454 n = strcspn(cgroup, "/");
1455 if (n < 3)
1456 return -ENXIO;
1457
1458 c = strndupa(cgroup, n);
1459 c = cg_unescape(c);
1460
1461 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1462 return -ENXIO;
1463
1464 s = strdup(c);
1465 if (!s)
1466 return -ENOMEM;
1467
1468 *unit = s;
1469 return 0;
1470 }
1471
1472 static bool valid_slice_name(const char *p, size_t n) {
1473
1474 if (!p)
1475 return false;
1476
1477 if (n < STRLEN("x.slice"))
1478 return false;
1479
1480 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1481 char buf[n+1], *c;
1482
1483 memcpy(buf, p, n);
1484 buf[n] = 0;
1485
1486 c = cg_unescape(buf);
1487
1488 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1489 }
1490
1491 return false;
1492 }
1493
1494 static const char *skip_slices(const char *p) {
1495 assert(p);
1496
1497 /* Skips over all slice assignments */
1498
1499 for (;;) {
1500 size_t n;
1501
1502 p += strspn(p, "/");
1503
1504 n = strcspn(p, "/");
1505 if (!valid_slice_name(p, n))
1506 return p;
1507
1508 p += n;
1509 }
1510 }
1511
1512 int cg_path_get_unit(const char *path, char **ret) {
1513 const char *e;
1514 char *unit;
1515 int r;
1516
1517 assert(path);
1518 assert(ret);
1519
1520 e = skip_slices(path);
1521
1522 r = cg_path_decode_unit(e, &unit);
1523 if (r < 0)
1524 return r;
1525
1526 /* We skipped over the slices, don't accept any now */
1527 if (endswith(unit, ".slice")) {
1528 free(unit);
1529 return -ENXIO;
1530 }
1531
1532 *ret = unit;
1533 return 0;
1534 }
1535
1536 int cg_pid_get_unit(pid_t pid, char **unit) {
1537 _cleanup_free_ char *cgroup = NULL;
1538 int r;
1539
1540 assert(unit);
1541
1542 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1543 if (r < 0)
1544 return r;
1545
1546 return cg_path_get_unit(cgroup, unit);
1547 }
1548
1549 /**
1550 * Skip session-*.scope, but require it to be there.
1551 */
1552 static const char *skip_session(const char *p) {
1553 size_t n;
1554
1555 if (isempty(p))
1556 return NULL;
1557
1558 p += strspn(p, "/");
1559
1560 n = strcspn(p, "/");
1561 if (n < STRLEN("session-x.scope"))
1562 return NULL;
1563
1564 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1565 char buf[n - 8 - 6 + 1];
1566
1567 memcpy(buf, p + 8, n - 8 - 6);
1568 buf[n - 8 - 6] = 0;
1569
1570 /* Note that session scopes never need unescaping,
1571 * since they cannot conflict with the kernel's own
1572 * names, hence we don't need to call cg_unescape()
1573 * here. */
1574
1575 if (!session_id_valid(buf))
1576 return false;
1577
1578 p += n;
1579 p += strspn(p, "/");
1580 return p;
1581 }
1582
1583 return NULL;
1584 }
1585
1586 /**
1587 * Skip user@*.service, but require it to be there.
1588 */
1589 static const char *skip_user_manager(const char *p) {
1590 size_t n;
1591
1592 if (isempty(p))
1593 return NULL;
1594
1595 p += strspn(p, "/");
1596
1597 n = strcspn(p, "/");
1598 if (n < STRLEN("user@x.service"))
1599 return NULL;
1600
1601 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1602 char buf[n - 5 - 8 + 1];
1603
1604 memcpy(buf, p + 5, n - 5 - 8);
1605 buf[n - 5 - 8] = 0;
1606
1607 /* Note that user manager services never need unescaping,
1608 * since they cannot conflict with the kernel's own
1609 * names, hence we don't need to call cg_unescape()
1610 * here. */
1611
1612 if (parse_uid(buf, NULL) < 0)
1613 return NULL;
1614
1615 p += n;
1616 p += strspn(p, "/");
1617
1618 return p;
1619 }
1620
1621 return NULL;
1622 }
1623
1624 static const char *skip_user_prefix(const char *path) {
1625 const char *e, *t;
1626
1627 assert(path);
1628
1629 /* Skip slices, if there are any */
1630 e = skip_slices(path);
1631
1632 /* Skip the user manager, if it's in the path now... */
1633 t = skip_user_manager(e);
1634 if (t)
1635 return t;
1636
1637 /* Alternatively skip the user session if it is in the path... */
1638 return skip_session(e);
1639 }
1640
1641 int cg_path_get_user_unit(const char *path, char **ret) {
1642 const char *t;
1643
1644 assert(path);
1645 assert(ret);
1646
1647 t = skip_user_prefix(path);
1648 if (!t)
1649 return -ENXIO;
1650
1651 /* And from here on it looks pretty much the same as for a
1652 * system unit, hence let's use the same parser from here
1653 * on. */
1654 return cg_path_get_unit(t, ret);
1655 }
1656
1657 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1658 _cleanup_free_ char *cgroup = NULL;
1659 int r;
1660
1661 assert(unit);
1662
1663 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1664 if (r < 0)
1665 return r;
1666
1667 return cg_path_get_user_unit(cgroup, unit);
1668 }
1669
1670 int cg_path_get_machine_name(const char *path, char **machine) {
1671 _cleanup_free_ char *u = NULL;
1672 const char *sl;
1673 int r;
1674
1675 r = cg_path_get_unit(path, &u);
1676 if (r < 0)
1677 return r;
1678
1679 sl = strjoina("/run/systemd/machines/unit:", u);
1680 return readlink_malloc(sl, machine);
1681 }
1682
1683 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1684 _cleanup_free_ char *cgroup = NULL;
1685 int r;
1686
1687 assert(machine);
1688
1689 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1690 if (r < 0)
1691 return r;
1692
1693 return cg_path_get_machine_name(cgroup, machine);
1694 }
1695
1696 int cg_path_get_session(const char *path, char **session) {
1697 _cleanup_free_ char *unit = NULL;
1698 char *start, *end;
1699 int r;
1700
1701 assert(path);
1702
1703 r = cg_path_get_unit(path, &unit);
1704 if (r < 0)
1705 return r;
1706
1707 start = startswith(unit, "session-");
1708 if (!start)
1709 return -ENXIO;
1710 end = endswith(start, ".scope");
1711 if (!end)
1712 return -ENXIO;
1713
1714 *end = 0;
1715 if (!session_id_valid(start))
1716 return -ENXIO;
1717
1718 if (session) {
1719 char *rr;
1720
1721 rr = strdup(start);
1722 if (!rr)
1723 return -ENOMEM;
1724
1725 *session = rr;
1726 }
1727
1728 return 0;
1729 }
1730
1731 int cg_pid_get_session(pid_t pid, char **session) {
1732 _cleanup_free_ char *cgroup = NULL;
1733 int r;
1734
1735 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1736 if (r < 0)
1737 return r;
1738
1739 return cg_path_get_session(cgroup, session);
1740 }
1741
1742 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1743 _cleanup_free_ char *slice = NULL;
1744 char *start, *end;
1745 int r;
1746
1747 assert(path);
1748
1749 r = cg_path_get_slice(path, &slice);
1750 if (r < 0)
1751 return r;
1752
1753 start = startswith(slice, "user-");
1754 if (!start)
1755 return -ENXIO;
1756 end = endswith(start, ".slice");
1757 if (!end)
1758 return -ENXIO;
1759
1760 *end = 0;
1761 if (parse_uid(start, uid) < 0)
1762 return -ENXIO;
1763
1764 return 0;
1765 }
1766
1767 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1768 _cleanup_free_ char *cgroup = NULL;
1769 int r;
1770
1771 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1772 if (r < 0)
1773 return r;
1774
1775 return cg_path_get_owner_uid(cgroup, uid);
1776 }
1777
1778 int cg_path_get_slice(const char *p, char **slice) {
1779 const char *e = NULL;
1780
1781 assert(p);
1782 assert(slice);
1783
1784 /* Finds the right-most slice unit from the beginning, but
1785 * stops before we come to the first non-slice unit. */
1786
1787 for (;;) {
1788 size_t n;
1789
1790 p += strspn(p, "/");
1791
1792 n = strcspn(p, "/");
1793 if (!valid_slice_name(p, n)) {
1794
1795 if (!e) {
1796 char *s;
1797
1798 s = strdup(SPECIAL_ROOT_SLICE);
1799 if (!s)
1800 return -ENOMEM;
1801
1802 *slice = s;
1803 return 0;
1804 }
1805
1806 return cg_path_decode_unit(e, slice);
1807 }
1808
1809 e = p;
1810 p += n;
1811 }
1812 }
1813
1814 int cg_pid_get_slice(pid_t pid, char **slice) {
1815 _cleanup_free_ char *cgroup = NULL;
1816 int r;
1817
1818 assert(slice);
1819
1820 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1821 if (r < 0)
1822 return r;
1823
1824 return cg_path_get_slice(cgroup, slice);
1825 }
1826
1827 int cg_path_get_user_slice(const char *p, char **slice) {
1828 const char *t;
1829 assert(p);
1830 assert(slice);
1831
1832 t = skip_user_prefix(p);
1833 if (!t)
1834 return -ENXIO;
1835
1836 /* And now it looks pretty much the same as for a system
1837 * slice, so let's just use the same parser from here on. */
1838 return cg_path_get_slice(t, slice);
1839 }
1840
1841 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1842 _cleanup_free_ char *cgroup = NULL;
1843 int r;
1844
1845 assert(slice);
1846
1847 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1848 if (r < 0)
1849 return r;
1850
1851 return cg_path_get_user_slice(cgroup, slice);
1852 }
1853
1854 char *cg_escape(const char *p) {
1855 bool need_prefix = false;
1856
1857 /* This implements very minimal escaping for names to be used
1858 * as file names in the cgroup tree: any name which might
1859 * conflict with a kernel name or is prefixed with '_' is
1860 * prefixed with a '_'. That way, when reading cgroup names it
1861 * is sufficient to remove a single prefixing underscore if
1862 * there is one. */
1863
1864 /* The return value of this function (unlike cg_unescape())
1865 * needs free()! */
1866
1867 if (IN_SET(p[0], 0, '_', '.') ||
1868 streq(p, "notify_on_release") ||
1869 streq(p, "release_agent") ||
1870 streq(p, "tasks") ||
1871 startswith(p, "cgroup."))
1872 need_prefix = true;
1873 else {
1874 const char *dot;
1875
1876 dot = strrchr(p, '.');
1877 if (dot) {
1878 CGroupController c;
1879 size_t l = dot - p;
1880
1881 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1882 const char *n;
1883
1884 n = cgroup_controller_to_string(c);
1885
1886 if (l != strlen(n))
1887 continue;
1888
1889 if (memcmp(p, n, l) != 0)
1890 continue;
1891
1892 need_prefix = true;
1893 break;
1894 }
1895 }
1896 }
1897
1898 if (need_prefix)
1899 return strappend("_", p);
1900
1901 return strdup(p);
1902 }
1903
1904 char *cg_unescape(const char *p) {
1905 assert(p);
1906
1907 /* The return value of this function (unlike cg_escape())
1908 * doesn't need free()! */
1909
1910 if (p[0] == '_')
1911 return (char*) p+1;
1912
1913 return (char*) p;
1914 }
1915
1916 #define CONTROLLER_VALID \
1917 DIGITS LETTERS \
1918 "_"
1919
1920 bool cg_controller_is_valid(const char *p) {
1921 const char *t, *s;
1922
1923 if (!p)
1924 return false;
1925
1926 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1927 return true;
1928
1929 s = startswith(p, "name=");
1930 if (s)
1931 p = s;
1932
1933 if (IN_SET(*p, 0, '_'))
1934 return false;
1935
1936 for (t = p; *t; t++)
1937 if (!strchr(CONTROLLER_VALID, *t))
1938 return false;
1939
1940 if (t - p > FILENAME_MAX)
1941 return false;
1942
1943 return true;
1944 }
1945
1946 int cg_slice_to_path(const char *unit, char **ret) {
1947 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1948 const char *dash;
1949 int r;
1950
1951 assert(unit);
1952 assert(ret);
1953
1954 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1955 char *x;
1956
1957 x = strdup("");
1958 if (!x)
1959 return -ENOMEM;
1960 *ret = x;
1961 return 0;
1962 }
1963
1964 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1965 return -EINVAL;
1966
1967 if (!endswith(unit, ".slice"))
1968 return -EINVAL;
1969
1970 r = unit_name_to_prefix(unit, &p);
1971 if (r < 0)
1972 return r;
1973
1974 dash = strchr(p, '-');
1975
1976 /* Don't allow initial dashes */
1977 if (dash == p)
1978 return -EINVAL;
1979
1980 while (dash) {
1981 _cleanup_free_ char *escaped = NULL;
1982 char n[dash - p + sizeof(".slice")];
1983
1984 /* Don't allow trailing or double dashes */
1985 if (IN_SET(dash[1], 0, '-'))
1986 return -EINVAL;
1987
1988 strcpy(stpncpy(n, p, dash - p), ".slice");
1989 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1990 return -EINVAL;
1991
1992 escaped = cg_escape(n);
1993 if (!escaped)
1994 return -ENOMEM;
1995
1996 if (!strextend(&s, escaped, "/", NULL))
1997 return -ENOMEM;
1998
1999 dash = strchr(dash+1, '-');
2000 }
2001
2002 e = cg_escape(unit);
2003 if (!e)
2004 return -ENOMEM;
2005
2006 if (!strextend(&s, e, NULL))
2007 return -ENOMEM;
2008
2009 *ret = s;
2010 s = NULL;
2011
2012 return 0;
2013 }
2014
2015 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2016 _cleanup_free_ char *p = NULL;
2017 int r;
2018
2019 r = cg_get_path(controller, path, attribute, &p);
2020 if (r < 0)
2021 return r;
2022
2023 return write_string_file(p, value, 0);
2024 }
2025
2026 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2027 _cleanup_free_ char *p = NULL;
2028 int r;
2029
2030 r = cg_get_path(controller, path, attribute, &p);
2031 if (r < 0)
2032 return r;
2033
2034 return read_one_line_file(p, ret);
2035 }
2036
2037 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2038 _cleanup_free_ char *filename = NULL, *content = NULL;
2039 char *line, *p;
2040 int i, r;
2041
2042 for (i = 0; keys[i]; i++)
2043 values[i] = NULL;
2044
2045 r = cg_get_path(controller, path, attribute, &filename);
2046 if (r < 0)
2047 return r;
2048
2049 r = read_full_file(filename, &content, NULL);
2050 if (r < 0)
2051 return r;
2052
2053 p = content;
2054 while ((line = strsep(&p, "\n"))) {
2055 char *key;
2056
2057 key = strsep(&line, " ");
2058
2059 for (i = 0; keys[i]; i++) {
2060 if (streq(key, keys[i])) {
2061 values[i] = strdup(line);
2062 break;
2063 }
2064 }
2065 }
2066
2067 for (i = 0; keys[i]; i++) {
2068 if (!values[i]) {
2069 for (i = 0; keys[i]; i++) {
2070 values[i] = mfree(values[i]);
2071 }
2072 return -ENOENT;
2073 }
2074 }
2075
2076 return 0;
2077 }
2078
2079 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2080 CGroupController c;
2081 int r;
2082
2083 /* This one will create a cgroup in our private tree, but also
2084 * duplicate it in the trees specified in mask, and remove it
2085 * in all others */
2086
2087 /* First create the cgroup in our own hierarchy. */
2088 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2089 if (r < 0)
2090 return r;
2091
2092 /* If we are in the unified hierarchy, we are done now */
2093 r = cg_all_unified();
2094 if (r < 0)
2095 return r;
2096 if (r > 0)
2097 return 0;
2098
2099 /* Otherwise, do the same in the other hierarchies */
2100 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2101 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2102 const char *n;
2103
2104 n = cgroup_controller_to_string(c);
2105
2106 if (mask & bit)
2107 (void) cg_create(n, path);
2108 else if (supported & bit)
2109 (void) cg_trim(n, path, true);
2110 }
2111
2112 return 0;
2113 }
2114
2115 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2116 CGroupController c;
2117 int r;
2118
2119 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2120 if (r < 0)
2121 return r;
2122
2123 r = cg_all_unified();
2124 if (r < 0)
2125 return r;
2126 if (r > 0)
2127 return 0;
2128
2129 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2130 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2131 const char *p = NULL;
2132
2133 if (!(supported & bit))
2134 continue;
2135
2136 if (path_callback)
2137 p = path_callback(bit, userdata);
2138
2139 if (!p)
2140 p = path;
2141
2142 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2143 }
2144
2145 return 0;
2146 }
2147
2148 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2149 Iterator i;
2150 void *pidp;
2151 int r = 0;
2152
2153 SET_FOREACH(pidp, pids, i) {
2154 pid_t pid = PTR_TO_PID(pidp);
2155 int q;
2156
2157 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2158 if (q < 0 && r >= 0)
2159 r = q;
2160 }
2161
2162 return r;
2163 }
2164
2165 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2166 CGroupController c;
2167 int r = 0, q;
2168
2169 if (!path_equal(from, to)) {
2170 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2171 if (r < 0)
2172 return r;
2173 }
2174
2175 q = cg_all_unified();
2176 if (q < 0)
2177 return q;
2178 if (q > 0)
2179 return r;
2180
2181 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2182 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2183 const char *p = NULL;
2184
2185 if (!(supported & bit))
2186 continue;
2187
2188 if (to_callback)
2189 p = to_callback(bit, userdata);
2190
2191 if (!p)
2192 p = to;
2193
2194 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2195 }
2196
2197 return 0;
2198 }
2199
2200 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2201 CGroupController c;
2202 int r, q;
2203
2204 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2205 if (r < 0)
2206 return r;
2207
2208 q = cg_all_unified();
2209 if (q < 0)
2210 return q;
2211 if (q > 0)
2212 return r;
2213
2214 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2215 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2216
2217 if (!(supported & bit))
2218 continue;
2219
2220 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2221 }
2222
2223 return 0;
2224 }
2225
2226 int cg_mask_to_string(CGroupMask mask, char **ret) {
2227 _cleanup_free_ char *s = NULL;
2228 size_t n = 0, allocated = 0;
2229 bool space = false;
2230 CGroupController c;
2231
2232 assert(ret);
2233
2234 if (mask == 0) {
2235 *ret = NULL;
2236 return 0;
2237 }
2238
2239 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2240 const char *k;
2241 size_t l;
2242
2243 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2244 continue;
2245
2246 k = cgroup_controller_to_string(c);
2247 l = strlen(k);
2248
2249 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2250 return -ENOMEM;
2251
2252 if (space)
2253 s[n] = ' ';
2254 memcpy(s + n + space, k, l);
2255 n += space + l;
2256
2257 space = true;
2258 }
2259
2260 assert(s);
2261
2262 s[n] = 0;
2263 *ret = s;
2264 s = NULL;
2265
2266 return 0;
2267 }
2268
2269 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2270 assert(mask);
2271 assert(value);
2272
2273 for (;;) {
2274 _cleanup_free_ char *n = NULL;
2275 CGroupController v;
2276 int r;
2277
2278 r = extract_first_word(&value, &n, NULL, 0);
2279 if (r < 0)
2280 return r;
2281 if (r == 0)
2282 break;
2283
2284 v = cgroup_controller_from_string(n);
2285 if (v < 0)
2286 continue;
2287
2288 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2289 }
2290 return 0;
2291 }
2292
2293 int cg_mask_supported(CGroupMask *ret) {
2294 CGroupMask mask = 0;
2295 int r;
2296
2297 /* Determines the mask of supported cgroup controllers. Only
2298 * includes controllers we can make sense of and that are
2299 * actually accessible. */
2300
2301 r = cg_all_unified();
2302 if (r < 0)
2303 return r;
2304 if (r > 0) {
2305 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2306
2307 /* In the unified hierarchy we can read the supported
2308 * and accessible controllers from a the top-level
2309 * cgroup attribute */
2310
2311 r = cg_get_root_path(&root);
2312 if (r < 0)
2313 return r;
2314
2315 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2316 if (r < 0)
2317 return r;
2318
2319 r = read_one_line_file(path, &controllers);
2320 if (r < 0)
2321 return r;
2322
2323 r = cg_mask_from_string(controllers, &mask);
2324 if (r < 0)
2325 return r;
2326
2327 /* Currently, we support the cpu, memory, io and pids
2328 * controller in the unified hierarchy, mask
2329 * everything else off. */
2330 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2331
2332 } else {
2333 CGroupController c;
2334
2335 /* In the legacy hierarchy, we check whether which
2336 * hierarchies are mounted. */
2337
2338 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2339 const char *n;
2340
2341 n = cgroup_controller_to_string(c);
2342 if (controller_is_accessible(n) >= 0)
2343 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2344 }
2345 }
2346
2347 *ret = mask;
2348 return 0;
2349 }
2350
2351 int cg_kernel_controllers(Set **ret) {
2352 _cleanup_set_free_free_ Set *controllers = NULL;
2353 _cleanup_fclose_ FILE *f = NULL;
2354 int r;
2355
2356 assert(ret);
2357
2358 /* Determines the full list of kernel-known controllers. Might
2359 * include controllers we don't actually support, arbitrary
2360 * named hierarchies and controllers that aren't currently
2361 * accessible (because not mounted). */
2362
2363 controllers = set_new(&string_hash_ops);
2364 if (!controllers)
2365 return -ENOMEM;
2366
2367 f = fopen("/proc/cgroups", "re");
2368 if (!f) {
2369 if (errno == ENOENT) {
2370 *ret = NULL;
2371 return 0;
2372 }
2373
2374 return -errno;
2375 }
2376
2377 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2378
2379 /* Ignore the header line */
2380 (void) read_line(f, (size_t) -1, NULL);
2381
2382 for (;;) {
2383 char *controller;
2384 int enabled = 0;
2385
2386 errno = 0;
2387 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2388
2389 if (feof(f))
2390 break;
2391
2392 if (ferror(f) && errno > 0)
2393 return -errno;
2394
2395 return -EBADMSG;
2396 }
2397
2398 if (!enabled) {
2399 free(controller);
2400 continue;
2401 }
2402
2403 if (!cg_controller_is_valid(controller)) {
2404 free(controller);
2405 return -EBADMSG;
2406 }
2407
2408 r = set_consume(controllers, controller);
2409 if (r < 0)
2410 return r;
2411 }
2412
2413 *ret = controllers;
2414 controllers = NULL;
2415
2416 return 0;
2417 }
2418
2419 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2420
2421 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2422 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2423 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2424 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2425 *
2426 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2427 * process management but disable the compat dual layout, we return %true on
2428 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2429 */
2430 static thread_local bool unified_systemd_v232;
2431
2432 static int cg_unified_update(void) {
2433
2434 struct statfs fs;
2435
2436 /* Checks if we support the unified hierarchy. Returns an
2437 * error when the cgroup hierarchies aren't mounted yet or we
2438 * have any other trouble determining if the unified hierarchy
2439 * is supported. */
2440
2441 if (unified_cache >= CGROUP_UNIFIED_NONE)
2442 return 0;
2443
2444 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2445 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2446
2447 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2448 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2449 unified_cache = CGROUP_UNIFIED_ALL;
2450 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2451 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2452 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2453 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2454 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2455 unified_systemd_v232 = false;
2456 } else {
2457 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2458 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2459
2460 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2461 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2462 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2463 unified_systemd_v232 = true;
2464 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2465 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2466 unified_cache = CGROUP_UNIFIED_NONE;
2467 } else {
2468 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2469 (unsigned long long) fs.f_type);
2470 unified_cache = CGROUP_UNIFIED_NONE;
2471 }
2472 }
2473 } else {
2474 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2475 (unsigned long long) fs.f_type);
2476 return -ENOMEDIUM;
2477 }
2478
2479 return 0;
2480 }
2481
2482 int cg_unified_controller(const char *controller) {
2483 int r;
2484
2485 r = cg_unified_update();
2486 if (r < 0)
2487 return r;
2488
2489 if (unified_cache == CGROUP_UNIFIED_NONE)
2490 return false;
2491
2492 if (unified_cache >= CGROUP_UNIFIED_ALL)
2493 return true;
2494
2495 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2496 }
2497
2498 int cg_all_unified(void) {
2499 int r;
2500
2501 r = cg_unified_update();
2502 if (r < 0)
2503 return r;
2504
2505 return unified_cache >= CGROUP_UNIFIED_ALL;
2506 }
2507
2508 int cg_hybrid_unified(void) {
2509 int r;
2510
2511 r = cg_unified_update();
2512 if (r < 0)
2513 return r;
2514
2515 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2516 }
2517
2518 int cg_unified_flush(void) {
2519 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2520
2521 return cg_unified_update();
2522 }
2523
2524 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2525 _cleanup_fclose_ FILE *f = NULL;
2526 _cleanup_free_ char *fs = NULL;
2527 CGroupController c;
2528 int r;
2529
2530 assert(p);
2531
2532 if (supported == 0)
2533 return 0;
2534
2535 r = cg_all_unified();
2536 if (r < 0)
2537 return r;
2538 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2539 return 0;
2540
2541 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2542 if (r < 0)
2543 return r;
2544
2545 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2546 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2547 const char *n;
2548
2549 if (!(supported & bit))
2550 continue;
2551
2552 n = cgroup_controller_to_string(c);
2553 {
2554 char s[1 + strlen(n) + 1];
2555
2556 s[0] = mask & bit ? '+' : '-';
2557 strcpy(s + 1, n);
2558
2559 if (!f) {
2560 f = fopen(fs, "we");
2561 if (!f) {
2562 log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2563 break;
2564 }
2565 }
2566
2567 r = write_string_stream(f, s, 0);
2568 if (r < 0)
2569 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2570 }
2571 }
2572
2573 return 0;
2574 }
2575
2576 bool cg_is_unified_wanted(void) {
2577 static thread_local int wanted = -1;
2578 int r;
2579 bool b;
2580 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2581
2582 /* If we have a cached value, return that. */
2583 if (wanted >= 0)
2584 return wanted;
2585
2586 /* If the hierarchy is already mounted, then follow whatever
2587 * was chosen for it. */
2588 if (cg_unified_flush() >= 0)
2589 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2590
2591 /* Otherwise, let's see what the kernel command line has to say.
2592 * Since checking is expensive, cache a non-error result. */
2593 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2594
2595 return (wanted = r > 0 ? b : is_default);
2596 }
2597
2598 bool cg_is_legacy_wanted(void) {
2599 static thread_local int wanted = -1;
2600
2601 /* If we have a cached value, return that. */
2602 if (wanted >= 0)
2603 return wanted;
2604
2605 /* Check if we have cgroups2 already mounted. */
2606 if (cg_unified_flush() >= 0 &&
2607 unified_cache == CGROUP_UNIFIED_ALL)
2608 return (wanted = false);
2609
2610 /* Otherwise, assume that at least partial legacy is wanted,
2611 * since cgroups2 should already be mounted at this point. */
2612 return (wanted = true);
2613 }
2614
2615 bool cg_is_hybrid_wanted(void) {
2616 static thread_local int wanted = -1;
2617 int r;
2618 bool b;
2619 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2620 /* We default to true if the default is "hybrid", obviously,
2621 * but also when the default is "unified", because if we get
2622 * called, it means that unified hierarchy was not mounted. */
2623
2624 /* If we have a cached value, return that. */
2625 if (wanted >= 0)
2626 return wanted;
2627
2628 /* If the hierarchy is already mounted, then follow whatever
2629 * was chosen for it. */
2630 if (cg_unified_flush() >= 0 &&
2631 unified_cache == CGROUP_UNIFIED_ALL)
2632 return (wanted = false);
2633
2634 /* Otherwise, let's see what the kernel command line has to say.
2635 * Since checking is expensive, cache a non-error result. */
2636 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2637
2638 /* The meaning of the kernel option is reversed wrt. to the return value
2639 * of this function, hence the negation. */
2640 return (wanted = r > 0 ? !b : is_default);
2641 }
2642
2643 int cg_weight_parse(const char *s, uint64_t *ret) {
2644 uint64_t u;
2645 int r;
2646
2647 if (isempty(s)) {
2648 *ret = CGROUP_WEIGHT_INVALID;
2649 return 0;
2650 }
2651
2652 r = safe_atou64(s, &u);
2653 if (r < 0)
2654 return r;
2655
2656 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2657 return -ERANGE;
2658
2659 *ret = u;
2660 return 0;
2661 }
2662
2663 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2664 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2665 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2666 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2667 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2668 };
2669
2670 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2671 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2672 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2673 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2674 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2675 };
2676
2677 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2678
2679 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2680 uint64_t u;
2681 int r;
2682
2683 if (isempty(s)) {
2684 *ret = CGROUP_CPU_SHARES_INVALID;
2685 return 0;
2686 }
2687
2688 r = safe_atou64(s, &u);
2689 if (r < 0)
2690 return r;
2691
2692 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2693 return -ERANGE;
2694
2695 *ret = u;
2696 return 0;
2697 }
2698
2699 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2700 uint64_t u;
2701 int r;
2702
2703 if (isempty(s)) {
2704 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2705 return 0;
2706 }
2707
2708 r = safe_atou64(s, &u);
2709 if (r < 0)
2710 return r;
2711
2712 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2713 return -ERANGE;
2714
2715 *ret = u;
2716 return 0;
2717 }
2718
2719 bool is_cgroup_fs(const struct statfs *s) {
2720 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2721 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2722 }
2723
2724 bool fd_is_cgroup_fs(int fd) {
2725 struct statfs s;
2726
2727 if (fstatfs(fd, &s) < 0)
2728 return -errno;
2729
2730 return is_cgroup_fs(&s);
2731 }
2732
2733 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2734 [CGROUP_CONTROLLER_CPU] = "cpu",
2735 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2736 [CGROUP_CONTROLLER_IO] = "io",
2737 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2738 [CGROUP_CONTROLLER_MEMORY] = "memory",
2739 [CGROUP_CONTROLLER_DEVICES] = "devices",
2740 [CGROUP_CONTROLLER_PIDS] = "pids",
2741 };
2742
2743 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);