]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
core: introduce CGroupIOLimitType enums
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <ftw.h>
23 #include <limits.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
35 #include "def.h"
36 #include "dirent-util.h"
37 #include "extract-word.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "formats-util.h"
41 #include "fs-util.h"
42 #include "log.h"
43 #include "login-util.h"
44 #include "macro.h"
45 #include "missing.h"
46 #include "mkdir.h"
47 #include "parse-util.h"
48 #include "path-util.h"
49 #include "proc-cmdline.h"
50 #include "process-util.h"
51 #include "set.h"
52 #include "special.h"
53 #include "stat-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "unit-name.h"
58 #include "user-util.h"
59
60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
61 _cleanup_free_ char *fs = NULL;
62 FILE *f;
63 int r;
64
65 assert(_f);
66
67 r = cg_get_path(controller, path, "cgroup.procs", &fs);
68 if (r < 0)
69 return r;
70
71 f = fopen(fs, "re");
72 if (!f)
73 return -errno;
74
75 *_f = f;
76 return 0;
77 }
78
79 int cg_read_pid(FILE *f, pid_t *_pid) {
80 unsigned long ul;
81
82 /* Note that the cgroup.procs might contain duplicates! See
83 * cgroups.txt for details. */
84
85 assert(f);
86 assert(_pid);
87
88 errno = 0;
89 if (fscanf(f, "%lu", &ul) != 1) {
90
91 if (feof(f))
92 return 0;
93
94 return errno > 0 ? -errno : -EIO;
95 }
96
97 if (ul <= 0)
98 return -EIO;
99
100 *_pid = (pid_t) ul;
101 return 1;
102 }
103
104 int cg_read_event(const char *controller, const char *path, const char *event,
105 char **val)
106 {
107 _cleanup_free_ char *events = NULL, *content = NULL;
108 char *p, *line;
109 int r;
110
111 r = cg_get_path(controller, path, "cgroup.events", &events);
112 if (r < 0)
113 return r;
114
115 r = read_full_file(events, &content, NULL);
116 if (r < 0)
117 return r;
118
119 p = content;
120 while ((line = strsep(&p, "\n"))) {
121 char *key;
122
123 key = strsep(&line, " ");
124 if (!key || !line)
125 return -EINVAL;
126
127 if (strcmp(key, event))
128 continue;
129
130 *val = strdup(line);
131 return 0;
132 }
133
134 return -ENOENT;
135 }
136
137 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
138 _cleanup_free_ char *fs = NULL;
139 int r;
140 DIR *d;
141
142 assert(_d);
143
144 /* This is not recursive! */
145
146 r = cg_get_path(controller, path, NULL, &fs);
147 if (r < 0)
148 return r;
149
150 d = opendir(fs);
151 if (!d)
152 return -errno;
153
154 *_d = d;
155 return 0;
156 }
157
158 int cg_read_subgroup(DIR *d, char **fn) {
159 struct dirent *de;
160
161 assert(d);
162 assert(fn);
163
164 FOREACH_DIRENT_ALL(de, d, return -errno) {
165 char *b;
166
167 if (de->d_type != DT_DIR)
168 continue;
169
170 if (streq(de->d_name, ".") ||
171 streq(de->d_name, ".."))
172 continue;
173
174 b = strdup(de->d_name);
175 if (!b)
176 return -ENOMEM;
177
178 *fn = b;
179 return 1;
180 }
181
182 return 0;
183 }
184
185 int cg_rmdir(const char *controller, const char *path) {
186 _cleanup_free_ char *p = NULL;
187 int r;
188
189 r = cg_get_path(controller, path, NULL, &p);
190 if (r < 0)
191 return r;
192
193 r = rmdir(p);
194 if (r < 0 && errno != ENOENT)
195 return -errno;
196
197 return 0;
198 }
199
200 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
201 _cleanup_set_free_ Set *allocated_set = NULL;
202 bool done = false;
203 int r, ret = 0;
204 pid_t my_pid;
205
206 assert(sig >= 0);
207
208 /* This goes through the tasks list and kills them all. This
209 * is repeated until no further processes are added to the
210 * tasks list, to properly handle forking processes */
211
212 if (!s) {
213 s = allocated_set = set_new(NULL);
214 if (!s)
215 return -ENOMEM;
216 }
217
218 my_pid = getpid();
219
220 do {
221 _cleanup_fclose_ FILE *f = NULL;
222 pid_t pid = 0;
223 done = true;
224
225 r = cg_enumerate_processes(controller, path, &f);
226 if (r < 0) {
227 if (ret >= 0 && r != -ENOENT)
228 return r;
229
230 return ret;
231 }
232
233 while ((r = cg_read_pid(f, &pid)) > 0) {
234
235 if (ignore_self && pid == my_pid)
236 continue;
237
238 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
239 continue;
240
241 /* If we haven't killed this process yet, kill
242 * it */
243 if (kill(pid, sig) < 0) {
244 if (ret >= 0 && errno != ESRCH)
245 ret = -errno;
246 } else {
247 if (sigcont && sig != SIGKILL)
248 (void) kill(pid, SIGCONT);
249
250 if (ret == 0)
251 ret = 1;
252 }
253
254 done = false;
255
256 r = set_put(s, PID_TO_PTR(pid));
257 if (r < 0) {
258 if (ret >= 0)
259 return r;
260
261 return ret;
262 }
263 }
264
265 if (r < 0) {
266 if (ret >= 0)
267 return r;
268
269 return ret;
270 }
271
272 /* To avoid racing against processes which fork
273 * quicker than we can kill them we repeat this until
274 * no new pids need to be killed. */
275
276 } while (!done);
277
278 return ret;
279 }
280
281 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
282 _cleanup_set_free_ Set *allocated_set = NULL;
283 _cleanup_closedir_ DIR *d = NULL;
284 int r, ret;
285 char *fn;
286
287 assert(path);
288 assert(sig >= 0);
289
290 if (!s) {
291 s = allocated_set = set_new(NULL);
292 if (!s)
293 return -ENOMEM;
294 }
295
296 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
297
298 r = cg_enumerate_subgroups(controller, path, &d);
299 if (r < 0) {
300 if (ret >= 0 && r != -ENOENT)
301 return r;
302
303 return ret;
304 }
305
306 while ((r = cg_read_subgroup(d, &fn)) > 0) {
307 _cleanup_free_ char *p = NULL;
308
309 p = strjoin(path, "/", fn, NULL);
310 free(fn);
311 if (!p)
312 return -ENOMEM;
313
314 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
315 if (r != 0 && ret >= 0)
316 ret = r;
317 }
318
319 if (ret >= 0 && r < 0)
320 ret = r;
321
322 if (rem) {
323 r = cg_rmdir(controller, path);
324 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
325 return r;
326 }
327
328 return ret;
329 }
330
331 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
332 bool done = false;
333 _cleanup_set_free_ Set *s = NULL;
334 int r, ret = 0;
335 pid_t my_pid;
336
337 assert(cfrom);
338 assert(pfrom);
339 assert(cto);
340 assert(pto);
341
342 s = set_new(NULL);
343 if (!s)
344 return -ENOMEM;
345
346 my_pid = getpid();
347
348 do {
349 _cleanup_fclose_ FILE *f = NULL;
350 pid_t pid = 0;
351 done = true;
352
353 r = cg_enumerate_processes(cfrom, pfrom, &f);
354 if (r < 0) {
355 if (ret >= 0 && r != -ENOENT)
356 return r;
357
358 return ret;
359 }
360
361 while ((r = cg_read_pid(f, &pid)) > 0) {
362
363 /* This might do weird stuff if we aren't a
364 * single-threaded program. However, we
365 * luckily know we are not */
366 if (ignore_self && pid == my_pid)
367 continue;
368
369 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
370 continue;
371
372 /* Ignore kernel threads. Since they can only
373 * exist in the root cgroup, we only check for
374 * them there. */
375 if (cfrom &&
376 (isempty(pfrom) || path_equal(pfrom, "/")) &&
377 is_kernel_thread(pid) > 0)
378 continue;
379
380 r = cg_attach(cto, pto, pid);
381 if (r < 0) {
382 if (ret >= 0 && r != -ESRCH)
383 ret = r;
384 } else if (ret == 0)
385 ret = 1;
386
387 done = false;
388
389 r = set_put(s, PID_TO_PTR(pid));
390 if (r < 0) {
391 if (ret >= 0)
392 return r;
393
394 return ret;
395 }
396 }
397
398 if (r < 0) {
399 if (ret >= 0)
400 return r;
401
402 return ret;
403 }
404 } while (!done);
405
406 return ret;
407 }
408
409 int cg_migrate_recursive(
410 const char *cfrom,
411 const char *pfrom,
412 const char *cto,
413 const char *pto,
414 bool ignore_self,
415 bool rem) {
416
417 _cleanup_closedir_ DIR *d = NULL;
418 int r, ret = 0;
419 char *fn;
420
421 assert(cfrom);
422 assert(pfrom);
423 assert(cto);
424 assert(pto);
425
426 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
427
428 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
429 if (r < 0) {
430 if (ret >= 0 && r != -ENOENT)
431 return r;
432
433 return ret;
434 }
435
436 while ((r = cg_read_subgroup(d, &fn)) > 0) {
437 _cleanup_free_ char *p = NULL;
438
439 p = strjoin(pfrom, "/", fn, NULL);
440 free(fn);
441 if (!p)
442 return -ENOMEM;
443
444 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
445 if (r != 0 && ret >= 0)
446 ret = r;
447 }
448
449 if (r < 0 && ret >= 0)
450 ret = r;
451
452 if (rem) {
453 r = cg_rmdir(cfrom, pfrom);
454 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
455 return r;
456 }
457
458 return ret;
459 }
460
461 int cg_migrate_recursive_fallback(
462 const char *cfrom,
463 const char *pfrom,
464 const char *cto,
465 const char *pto,
466 bool ignore_self,
467 bool rem) {
468
469 int r;
470
471 assert(cfrom);
472 assert(pfrom);
473 assert(cto);
474 assert(pto);
475
476 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
477 if (r < 0) {
478 char prefix[strlen(pto) + 1];
479
480 /* This didn't work? Then let's try all prefixes of the destination */
481
482 PATH_FOREACH_PREFIX(prefix, pto) {
483 int q;
484
485 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
486 if (q >= 0)
487 return q;
488 }
489 }
490
491 return r;
492 }
493
494 static const char *controller_to_dirname(const char *controller) {
495 const char *e;
496
497 assert(controller);
498
499 /* Converts a controller name to the directory name below
500 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
501 * just cuts off the name= prefixed used for named
502 * hierarchies, if it is specified. */
503
504 e = startswith(controller, "name=");
505 if (e)
506 return e;
507
508 return controller;
509 }
510
511 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
512 const char *dn;
513 char *t = NULL;
514
515 assert(fs);
516 assert(controller);
517
518 dn = controller_to_dirname(controller);
519
520 if (isempty(path) && isempty(suffix))
521 t = strappend("/sys/fs/cgroup/", dn);
522 else if (isempty(path))
523 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
524 else if (isempty(suffix))
525 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
526 else
527 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
528 if (!t)
529 return -ENOMEM;
530
531 *fs = t;
532 return 0;
533 }
534
535 static int join_path_unified(const char *path, const char *suffix, char **fs) {
536 char *t;
537
538 assert(fs);
539
540 if (isempty(path) && isempty(suffix))
541 t = strdup("/sys/fs/cgroup");
542 else if (isempty(path))
543 t = strappend("/sys/fs/cgroup/", suffix);
544 else if (isempty(suffix))
545 t = strappend("/sys/fs/cgroup/", path);
546 else
547 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
548 if (!t)
549 return -ENOMEM;
550
551 *fs = t;
552 return 0;
553 }
554
555 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
556 int unified, r;
557
558 assert(fs);
559
560 if (!controller) {
561 char *t;
562
563 /* If no controller is specified, we return the path
564 * *below* the controllers, without any prefix. */
565
566 if (!path && !suffix)
567 return -EINVAL;
568
569 if (!suffix)
570 t = strdup(path);
571 else if (!path)
572 t = strdup(suffix);
573 else
574 t = strjoin(path, "/", suffix, NULL);
575 if (!t)
576 return -ENOMEM;
577
578 *fs = path_kill_slashes(t);
579 return 0;
580 }
581
582 if (!cg_controller_is_valid(controller))
583 return -EINVAL;
584
585 unified = cg_unified();
586 if (unified < 0)
587 return unified;
588
589 if (unified > 0)
590 r = join_path_unified(path, suffix, fs);
591 else
592 r = join_path_legacy(controller, path, suffix, fs);
593 if (r < 0)
594 return r;
595
596 path_kill_slashes(*fs);
597 return 0;
598 }
599
600 static int controller_is_accessible(const char *controller) {
601 int unified;
602
603 assert(controller);
604
605 /* Checks whether a specific controller is accessible,
606 * i.e. its hierarchy mounted. In the unified hierarchy all
607 * controllers are considered accessible, except for the named
608 * hierarchies */
609
610 if (!cg_controller_is_valid(controller))
611 return -EINVAL;
612
613 unified = cg_unified();
614 if (unified < 0)
615 return unified;
616 if (unified > 0) {
617 /* We don't support named hierarchies if we are using
618 * the unified hierarchy. */
619
620 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
621 return 0;
622
623 if (startswith(controller, "name="))
624 return -EOPNOTSUPP;
625
626 } else {
627 const char *cc, *dn;
628
629 dn = controller_to_dirname(controller);
630 cc = strjoina("/sys/fs/cgroup/", dn);
631
632 if (laccess(cc, F_OK) < 0)
633 return -errno;
634 }
635
636 return 0;
637 }
638
639 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
640 int r;
641
642 assert(controller);
643 assert(fs);
644
645 /* Check if the specified controller is actually accessible */
646 r = controller_is_accessible(controller);
647 if (r < 0)
648 return r;
649
650 return cg_get_path(controller, path, suffix, fs);
651 }
652
653 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
654 assert(path);
655 assert(sb);
656 assert(ftwbuf);
657
658 if (typeflag != FTW_DP)
659 return 0;
660
661 if (ftwbuf->level < 1)
662 return 0;
663
664 (void) rmdir(path);
665 return 0;
666 }
667
668 int cg_trim(const char *controller, const char *path, bool delete_root) {
669 _cleanup_free_ char *fs = NULL;
670 int r = 0;
671
672 assert(path);
673
674 r = cg_get_path(controller, path, NULL, &fs);
675 if (r < 0)
676 return r;
677
678 errno = 0;
679 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
680 if (errno == ENOENT)
681 r = 0;
682 else if (errno > 0)
683 r = -errno;
684 else
685 r = -EIO;
686 }
687
688 if (delete_root) {
689 if (rmdir(fs) < 0 && errno != ENOENT)
690 return -errno;
691 }
692
693 return r;
694 }
695
696 int cg_create(const char *controller, const char *path) {
697 _cleanup_free_ char *fs = NULL;
698 int r;
699
700 r = cg_get_path_and_check(controller, path, NULL, &fs);
701 if (r < 0)
702 return r;
703
704 r = mkdir_parents(fs, 0755);
705 if (r < 0)
706 return r;
707
708 if (mkdir(fs, 0755) < 0) {
709
710 if (errno == EEXIST)
711 return 0;
712
713 return -errno;
714 }
715
716 return 1;
717 }
718
719 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
720 int r, q;
721
722 assert(pid >= 0);
723
724 r = cg_create(controller, path);
725 if (r < 0)
726 return r;
727
728 q = cg_attach(controller, path, pid);
729 if (q < 0)
730 return q;
731
732 /* This does not remove the cgroup on failure */
733 return r;
734 }
735
736 int cg_attach(const char *controller, const char *path, pid_t pid) {
737 _cleanup_free_ char *fs = NULL;
738 char c[DECIMAL_STR_MAX(pid_t) + 2];
739 int r;
740
741 assert(path);
742 assert(pid >= 0);
743
744 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
745 if (r < 0)
746 return r;
747
748 if (pid == 0)
749 pid = getpid();
750
751 xsprintf(c, PID_FMT "\n", pid);
752
753 return write_string_file(fs, c, 0);
754 }
755
756 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
757 int r;
758
759 assert(controller);
760 assert(path);
761 assert(pid >= 0);
762
763 r = cg_attach(controller, path, pid);
764 if (r < 0) {
765 char prefix[strlen(path) + 1];
766
767 /* This didn't work? Then let's try all prefixes of
768 * the destination */
769
770 PATH_FOREACH_PREFIX(prefix, path) {
771 int q;
772
773 q = cg_attach(controller, prefix, pid);
774 if (q >= 0)
775 return q;
776 }
777 }
778
779 return r;
780 }
781
782 int cg_set_group_access(
783 const char *controller,
784 const char *path,
785 mode_t mode,
786 uid_t uid,
787 gid_t gid) {
788
789 _cleanup_free_ char *fs = NULL;
790 int r;
791
792 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
793 return 0;
794
795 if (mode != MODE_INVALID)
796 mode &= 0777;
797
798 r = cg_get_path(controller, path, NULL, &fs);
799 if (r < 0)
800 return r;
801
802 return chmod_and_chown(fs, mode, uid, gid);
803 }
804
805 int cg_set_task_access(
806 const char *controller,
807 const char *path,
808 mode_t mode,
809 uid_t uid,
810 gid_t gid) {
811
812 _cleanup_free_ char *fs = NULL, *procs = NULL;
813 int r, unified;
814
815 assert(path);
816
817 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
818 return 0;
819
820 if (mode != MODE_INVALID)
821 mode &= 0666;
822
823 r = cg_get_path(controller, path, "cgroup.procs", &fs);
824 if (r < 0)
825 return r;
826
827 r = chmod_and_chown(fs, mode, uid, gid);
828 if (r < 0)
829 return r;
830
831 unified = cg_unified();
832 if (unified < 0)
833 return unified;
834 if (unified)
835 return 0;
836
837 /* Compatibility, Always keep values for "tasks" in sync with
838 * "cgroup.procs" */
839 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
840 (void) chmod_and_chown(procs, mode, uid, gid);
841
842 return 0;
843 }
844
845 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
846 _cleanup_fclose_ FILE *f = NULL;
847 char line[LINE_MAX];
848 const char *fs;
849 size_t cs = 0;
850 int unified;
851
852 assert(path);
853 assert(pid >= 0);
854
855 unified = cg_unified();
856 if (unified < 0)
857 return unified;
858 if (unified == 0) {
859 if (controller) {
860 if (!cg_controller_is_valid(controller))
861 return -EINVAL;
862 } else
863 controller = SYSTEMD_CGROUP_CONTROLLER;
864
865 cs = strlen(controller);
866 }
867
868 fs = procfs_file_alloca(pid, "cgroup");
869 f = fopen(fs, "re");
870 if (!f)
871 return errno == ENOENT ? -ESRCH : -errno;
872
873 FOREACH_LINE(line, f, return -errno) {
874 char *e, *p;
875
876 truncate_nl(line);
877
878 if (unified) {
879 e = startswith(line, "0:");
880 if (!e)
881 continue;
882
883 e = strchr(e, ':');
884 if (!e)
885 continue;
886 } else {
887 char *l;
888 size_t k;
889 const char *word, *state;
890 bool found = false;
891
892 l = strchr(line, ':');
893 if (!l)
894 continue;
895
896 l++;
897 e = strchr(l, ':');
898 if (!e)
899 continue;
900
901 *e = 0;
902 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
903 if (k == cs && memcmp(word, controller, cs) == 0) {
904 found = true;
905 break;
906 }
907 }
908
909 if (!found)
910 continue;
911 }
912
913 p = strdup(e + 1);
914 if (!p)
915 return -ENOMEM;
916
917 *path = p;
918 return 0;
919 }
920
921 return -ENODATA;
922 }
923
924 int cg_install_release_agent(const char *controller, const char *agent) {
925 _cleanup_free_ char *fs = NULL, *contents = NULL;
926 const char *sc;
927 int r, unified;
928
929 assert(agent);
930
931 unified = cg_unified();
932 if (unified < 0)
933 return unified;
934 if (unified) /* doesn't apply to unified hierarchy */
935 return -EOPNOTSUPP;
936
937 r = cg_get_path(controller, NULL, "release_agent", &fs);
938 if (r < 0)
939 return r;
940
941 r = read_one_line_file(fs, &contents);
942 if (r < 0)
943 return r;
944
945 sc = strstrip(contents);
946 if (isempty(sc)) {
947 r = write_string_file(fs, agent, 0);
948 if (r < 0)
949 return r;
950 } else if (!path_equal(sc, agent))
951 return -EEXIST;
952
953 fs = mfree(fs);
954 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
955 if (r < 0)
956 return r;
957
958 contents = mfree(contents);
959 r = read_one_line_file(fs, &contents);
960 if (r < 0)
961 return r;
962
963 sc = strstrip(contents);
964 if (streq(sc, "0")) {
965 r = write_string_file(fs, "1", 0);
966 if (r < 0)
967 return r;
968
969 return 1;
970 }
971
972 if (!streq(sc, "1"))
973 return -EIO;
974
975 return 0;
976 }
977
978 int cg_uninstall_release_agent(const char *controller) {
979 _cleanup_free_ char *fs = NULL;
980 int r, unified;
981
982 unified = cg_unified();
983 if (unified < 0)
984 return unified;
985 if (unified) /* Doesn't apply to unified hierarchy */
986 return -EOPNOTSUPP;
987
988 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
989 if (r < 0)
990 return r;
991
992 r = write_string_file(fs, "0", 0);
993 if (r < 0)
994 return r;
995
996 fs = mfree(fs);
997
998 r = cg_get_path(controller, NULL, "release_agent", &fs);
999 if (r < 0)
1000 return r;
1001
1002 r = write_string_file(fs, "", 0);
1003 if (r < 0)
1004 return r;
1005
1006 return 0;
1007 }
1008
1009 int cg_is_empty(const char *controller, const char *path) {
1010 _cleanup_fclose_ FILE *f = NULL;
1011 pid_t pid;
1012 int r;
1013
1014 assert(path);
1015
1016 r = cg_enumerate_processes(controller, path, &f);
1017 if (r == -ENOENT)
1018 return 1;
1019 if (r < 0)
1020 return r;
1021
1022 r = cg_read_pid(f, &pid);
1023 if (r < 0)
1024 return r;
1025
1026 return r == 0;
1027 }
1028
1029 int cg_is_empty_recursive(const char *controller, const char *path) {
1030 int unified, r;
1031
1032 assert(path);
1033
1034 /* The root cgroup is always populated */
1035 if (controller && (isempty(path) || path_equal(path, "/")))
1036 return false;
1037
1038 unified = cg_unified();
1039 if (unified < 0)
1040 return unified;
1041
1042 if (unified > 0) {
1043 _cleanup_free_ char *t = NULL;
1044
1045 /* On the unified hierarchy we can check empty state
1046 * via the "populated" attribute of "cgroup.events". */
1047
1048 r = cg_read_event(controller, path, "populated", &t);
1049 if (r < 0)
1050 return r;
1051
1052 return streq(t, "0");
1053 } else {
1054 _cleanup_closedir_ DIR *d = NULL;
1055 char *fn;
1056
1057 r = cg_is_empty(controller, path);
1058 if (r <= 0)
1059 return r;
1060
1061 r = cg_enumerate_subgroups(controller, path, &d);
1062 if (r == -ENOENT)
1063 return 1;
1064 if (r < 0)
1065 return r;
1066
1067 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1068 _cleanup_free_ char *p = NULL;
1069
1070 p = strjoin(path, "/", fn, NULL);
1071 free(fn);
1072 if (!p)
1073 return -ENOMEM;
1074
1075 r = cg_is_empty_recursive(controller, p);
1076 if (r <= 0)
1077 return r;
1078 }
1079 if (r < 0)
1080 return r;
1081
1082 return true;
1083 }
1084 }
1085
1086 int cg_split_spec(const char *spec, char **controller, char **path) {
1087 char *t = NULL, *u = NULL;
1088 const char *e;
1089
1090 assert(spec);
1091
1092 if (*spec == '/') {
1093 if (!path_is_safe(spec))
1094 return -EINVAL;
1095
1096 if (path) {
1097 t = strdup(spec);
1098 if (!t)
1099 return -ENOMEM;
1100
1101 *path = path_kill_slashes(t);
1102 }
1103
1104 if (controller)
1105 *controller = NULL;
1106
1107 return 0;
1108 }
1109
1110 e = strchr(spec, ':');
1111 if (!e) {
1112 if (!cg_controller_is_valid(spec))
1113 return -EINVAL;
1114
1115 if (controller) {
1116 t = strdup(spec);
1117 if (!t)
1118 return -ENOMEM;
1119
1120 *controller = t;
1121 }
1122
1123 if (path)
1124 *path = NULL;
1125
1126 return 0;
1127 }
1128
1129 t = strndup(spec, e-spec);
1130 if (!t)
1131 return -ENOMEM;
1132 if (!cg_controller_is_valid(t)) {
1133 free(t);
1134 return -EINVAL;
1135 }
1136
1137 if (isempty(e+1))
1138 u = NULL;
1139 else {
1140 u = strdup(e+1);
1141 if (!u) {
1142 free(t);
1143 return -ENOMEM;
1144 }
1145
1146 if (!path_is_safe(u) ||
1147 !path_is_absolute(u)) {
1148 free(t);
1149 free(u);
1150 return -EINVAL;
1151 }
1152
1153 path_kill_slashes(u);
1154 }
1155
1156 if (controller)
1157 *controller = t;
1158 else
1159 free(t);
1160
1161 if (path)
1162 *path = u;
1163 else
1164 free(u);
1165
1166 return 0;
1167 }
1168
1169 int cg_mangle_path(const char *path, char **result) {
1170 _cleanup_free_ char *c = NULL, *p = NULL;
1171 char *t;
1172 int r;
1173
1174 assert(path);
1175 assert(result);
1176
1177 /* First, check if it already is a filesystem path */
1178 if (path_startswith(path, "/sys/fs/cgroup")) {
1179
1180 t = strdup(path);
1181 if (!t)
1182 return -ENOMEM;
1183
1184 *result = path_kill_slashes(t);
1185 return 0;
1186 }
1187
1188 /* Otherwise, treat it as cg spec */
1189 r = cg_split_spec(path, &c, &p);
1190 if (r < 0)
1191 return r;
1192
1193 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1194 }
1195
1196 int cg_get_root_path(char **path) {
1197 char *p, *e;
1198 int r;
1199
1200 assert(path);
1201
1202 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1203 if (r < 0)
1204 return r;
1205
1206 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1207 if (!e)
1208 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1209 if (!e)
1210 e = endswith(p, "/system"); /* even more legacy */
1211 if (e)
1212 *e = 0;
1213
1214 *path = p;
1215 return 0;
1216 }
1217
1218 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1219 _cleanup_free_ char *rt = NULL;
1220 char *p;
1221 int r;
1222
1223 assert(cgroup);
1224 assert(shifted);
1225
1226 if (!root) {
1227 /* If the root was specified let's use that, otherwise
1228 * let's determine it from PID 1 */
1229
1230 r = cg_get_root_path(&rt);
1231 if (r < 0)
1232 return r;
1233
1234 root = rt;
1235 }
1236
1237 p = path_startswith(cgroup, root);
1238 if (p && p > cgroup)
1239 *shifted = p - 1;
1240 else
1241 *shifted = cgroup;
1242
1243 return 0;
1244 }
1245
1246 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1247 _cleanup_free_ char *raw = NULL;
1248 const char *c;
1249 int r;
1250
1251 assert(pid >= 0);
1252 assert(cgroup);
1253
1254 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1255 if (r < 0)
1256 return r;
1257
1258 r = cg_shift_path(raw, root, &c);
1259 if (r < 0)
1260 return r;
1261
1262 if (c == raw) {
1263 *cgroup = raw;
1264 raw = NULL;
1265 } else {
1266 char *n;
1267
1268 n = strdup(c);
1269 if (!n)
1270 return -ENOMEM;
1271
1272 *cgroup = n;
1273 }
1274
1275 return 0;
1276 }
1277
1278 int cg_path_decode_unit(const char *cgroup, char **unit) {
1279 char *c, *s;
1280 size_t n;
1281
1282 assert(cgroup);
1283 assert(unit);
1284
1285 n = strcspn(cgroup, "/");
1286 if (n < 3)
1287 return -ENXIO;
1288
1289 c = strndupa(cgroup, n);
1290 c = cg_unescape(c);
1291
1292 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1293 return -ENXIO;
1294
1295 s = strdup(c);
1296 if (!s)
1297 return -ENOMEM;
1298
1299 *unit = s;
1300 return 0;
1301 }
1302
1303 static bool valid_slice_name(const char *p, size_t n) {
1304
1305 if (!p)
1306 return false;
1307
1308 if (n < strlen("x.slice"))
1309 return false;
1310
1311 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1312 char buf[n+1], *c;
1313
1314 memcpy(buf, p, n);
1315 buf[n] = 0;
1316
1317 c = cg_unescape(buf);
1318
1319 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1320 }
1321
1322 return false;
1323 }
1324
1325 static const char *skip_slices(const char *p) {
1326 assert(p);
1327
1328 /* Skips over all slice assignments */
1329
1330 for (;;) {
1331 size_t n;
1332
1333 p += strspn(p, "/");
1334
1335 n = strcspn(p, "/");
1336 if (!valid_slice_name(p, n))
1337 return p;
1338
1339 p += n;
1340 }
1341 }
1342
1343 int cg_path_get_unit(const char *path, char **ret) {
1344 const char *e;
1345 char *unit;
1346 int r;
1347
1348 assert(path);
1349 assert(ret);
1350
1351 e = skip_slices(path);
1352
1353 r = cg_path_decode_unit(e, &unit);
1354 if (r < 0)
1355 return r;
1356
1357 /* We skipped over the slices, don't accept any now */
1358 if (endswith(unit, ".slice")) {
1359 free(unit);
1360 return -ENXIO;
1361 }
1362
1363 *ret = unit;
1364 return 0;
1365 }
1366
1367 int cg_pid_get_unit(pid_t pid, char **unit) {
1368 _cleanup_free_ char *cgroup = NULL;
1369 int r;
1370
1371 assert(unit);
1372
1373 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1374 if (r < 0)
1375 return r;
1376
1377 return cg_path_get_unit(cgroup, unit);
1378 }
1379
1380 /**
1381 * Skip session-*.scope, but require it to be there.
1382 */
1383 static const char *skip_session(const char *p) {
1384 size_t n;
1385
1386 if (isempty(p))
1387 return NULL;
1388
1389 p += strspn(p, "/");
1390
1391 n = strcspn(p, "/");
1392 if (n < strlen("session-x.scope"))
1393 return NULL;
1394
1395 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1396 char buf[n - 8 - 6 + 1];
1397
1398 memcpy(buf, p + 8, n - 8 - 6);
1399 buf[n - 8 - 6] = 0;
1400
1401 /* Note that session scopes never need unescaping,
1402 * since they cannot conflict with the kernel's own
1403 * names, hence we don't need to call cg_unescape()
1404 * here. */
1405
1406 if (!session_id_valid(buf))
1407 return false;
1408
1409 p += n;
1410 p += strspn(p, "/");
1411 return p;
1412 }
1413
1414 return NULL;
1415 }
1416
1417 /**
1418 * Skip user@*.service, but require it to be there.
1419 */
1420 static const char *skip_user_manager(const char *p) {
1421 size_t n;
1422
1423 if (isempty(p))
1424 return NULL;
1425
1426 p += strspn(p, "/");
1427
1428 n = strcspn(p, "/");
1429 if (n < strlen("user@x.service"))
1430 return NULL;
1431
1432 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1433 char buf[n - 5 - 8 + 1];
1434
1435 memcpy(buf, p + 5, n - 5 - 8);
1436 buf[n - 5 - 8] = 0;
1437
1438 /* Note that user manager services never need unescaping,
1439 * since they cannot conflict with the kernel's own
1440 * names, hence we don't need to call cg_unescape()
1441 * here. */
1442
1443 if (parse_uid(buf, NULL) < 0)
1444 return NULL;
1445
1446 p += n;
1447 p += strspn(p, "/");
1448
1449 return p;
1450 }
1451
1452 return NULL;
1453 }
1454
1455 static const char *skip_user_prefix(const char *path) {
1456 const char *e, *t;
1457
1458 assert(path);
1459
1460 /* Skip slices, if there are any */
1461 e = skip_slices(path);
1462
1463 /* Skip the user manager, if it's in the path now... */
1464 t = skip_user_manager(e);
1465 if (t)
1466 return t;
1467
1468 /* Alternatively skip the user session if it is in the path... */
1469 return skip_session(e);
1470 }
1471
1472 int cg_path_get_user_unit(const char *path, char **ret) {
1473 const char *t;
1474
1475 assert(path);
1476 assert(ret);
1477
1478 t = skip_user_prefix(path);
1479 if (!t)
1480 return -ENXIO;
1481
1482 /* And from here on it looks pretty much the same as for a
1483 * system unit, hence let's use the same parser from here
1484 * on. */
1485 return cg_path_get_unit(t, ret);
1486 }
1487
1488 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1489 _cleanup_free_ char *cgroup = NULL;
1490 int r;
1491
1492 assert(unit);
1493
1494 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1495 if (r < 0)
1496 return r;
1497
1498 return cg_path_get_user_unit(cgroup, unit);
1499 }
1500
1501 int cg_path_get_machine_name(const char *path, char **machine) {
1502 _cleanup_free_ char *u = NULL;
1503 const char *sl;
1504 int r;
1505
1506 r = cg_path_get_unit(path, &u);
1507 if (r < 0)
1508 return r;
1509
1510 sl = strjoina("/run/systemd/machines/unit:", u);
1511 return readlink_malloc(sl, machine);
1512 }
1513
1514 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1515 _cleanup_free_ char *cgroup = NULL;
1516 int r;
1517
1518 assert(machine);
1519
1520 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1521 if (r < 0)
1522 return r;
1523
1524 return cg_path_get_machine_name(cgroup, machine);
1525 }
1526
1527 int cg_path_get_session(const char *path, char **session) {
1528 _cleanup_free_ char *unit = NULL;
1529 char *start, *end;
1530 int r;
1531
1532 assert(path);
1533
1534 r = cg_path_get_unit(path, &unit);
1535 if (r < 0)
1536 return r;
1537
1538 start = startswith(unit, "session-");
1539 if (!start)
1540 return -ENXIO;
1541 end = endswith(start, ".scope");
1542 if (!end)
1543 return -ENXIO;
1544
1545 *end = 0;
1546 if (!session_id_valid(start))
1547 return -ENXIO;
1548
1549 if (session) {
1550 char *rr;
1551
1552 rr = strdup(start);
1553 if (!rr)
1554 return -ENOMEM;
1555
1556 *session = rr;
1557 }
1558
1559 return 0;
1560 }
1561
1562 int cg_pid_get_session(pid_t pid, char **session) {
1563 _cleanup_free_ char *cgroup = NULL;
1564 int r;
1565
1566 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1567 if (r < 0)
1568 return r;
1569
1570 return cg_path_get_session(cgroup, session);
1571 }
1572
1573 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1574 _cleanup_free_ char *slice = NULL;
1575 char *start, *end;
1576 int r;
1577
1578 assert(path);
1579
1580 r = cg_path_get_slice(path, &slice);
1581 if (r < 0)
1582 return r;
1583
1584 start = startswith(slice, "user-");
1585 if (!start)
1586 return -ENXIO;
1587 end = endswith(start, ".slice");
1588 if (!end)
1589 return -ENXIO;
1590
1591 *end = 0;
1592 if (parse_uid(start, uid) < 0)
1593 return -ENXIO;
1594
1595 return 0;
1596 }
1597
1598 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1599 _cleanup_free_ char *cgroup = NULL;
1600 int r;
1601
1602 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1603 if (r < 0)
1604 return r;
1605
1606 return cg_path_get_owner_uid(cgroup, uid);
1607 }
1608
1609 int cg_path_get_slice(const char *p, char **slice) {
1610 const char *e = NULL;
1611
1612 assert(p);
1613 assert(slice);
1614
1615 /* Finds the right-most slice unit from the beginning, but
1616 * stops before we come to the first non-slice unit. */
1617
1618 for (;;) {
1619 size_t n;
1620
1621 p += strspn(p, "/");
1622
1623 n = strcspn(p, "/");
1624 if (!valid_slice_name(p, n)) {
1625
1626 if (!e) {
1627 char *s;
1628
1629 s = strdup("-.slice");
1630 if (!s)
1631 return -ENOMEM;
1632
1633 *slice = s;
1634 return 0;
1635 }
1636
1637 return cg_path_decode_unit(e, slice);
1638 }
1639
1640 e = p;
1641 p += n;
1642 }
1643 }
1644
1645 int cg_pid_get_slice(pid_t pid, char **slice) {
1646 _cleanup_free_ char *cgroup = NULL;
1647 int r;
1648
1649 assert(slice);
1650
1651 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1652 if (r < 0)
1653 return r;
1654
1655 return cg_path_get_slice(cgroup, slice);
1656 }
1657
1658 int cg_path_get_user_slice(const char *p, char **slice) {
1659 const char *t;
1660 assert(p);
1661 assert(slice);
1662
1663 t = skip_user_prefix(p);
1664 if (!t)
1665 return -ENXIO;
1666
1667 /* And now it looks pretty much the same as for a system
1668 * slice, so let's just use the same parser from here on. */
1669 return cg_path_get_slice(t, slice);
1670 }
1671
1672 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1673 _cleanup_free_ char *cgroup = NULL;
1674 int r;
1675
1676 assert(slice);
1677
1678 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1679 if (r < 0)
1680 return r;
1681
1682 return cg_path_get_user_slice(cgroup, slice);
1683 }
1684
1685 char *cg_escape(const char *p) {
1686 bool need_prefix = false;
1687
1688 /* This implements very minimal escaping for names to be used
1689 * as file names in the cgroup tree: any name which might
1690 * conflict with a kernel name or is prefixed with '_' is
1691 * prefixed with a '_'. That way, when reading cgroup names it
1692 * is sufficient to remove a single prefixing underscore if
1693 * there is one. */
1694
1695 /* The return value of this function (unlike cg_unescape())
1696 * needs free()! */
1697
1698 if (p[0] == 0 ||
1699 p[0] == '_' ||
1700 p[0] == '.' ||
1701 streq(p, "notify_on_release") ||
1702 streq(p, "release_agent") ||
1703 streq(p, "tasks") ||
1704 startswith(p, "cgroup."))
1705 need_prefix = true;
1706 else {
1707 const char *dot;
1708
1709 dot = strrchr(p, '.');
1710 if (dot) {
1711 CGroupController c;
1712 size_t l = dot - p;
1713
1714 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1715 const char *n;
1716
1717 n = cgroup_controller_to_string(c);
1718
1719 if (l != strlen(n))
1720 continue;
1721
1722 if (memcmp(p, n, l) != 0)
1723 continue;
1724
1725 need_prefix = true;
1726 break;
1727 }
1728 }
1729 }
1730
1731 if (need_prefix)
1732 return strappend("_", p);
1733
1734 return strdup(p);
1735 }
1736
1737 char *cg_unescape(const char *p) {
1738 assert(p);
1739
1740 /* The return value of this function (unlike cg_escape())
1741 * doesn't need free()! */
1742
1743 if (p[0] == '_')
1744 return (char*) p+1;
1745
1746 return (char*) p;
1747 }
1748
1749 #define CONTROLLER_VALID \
1750 DIGITS LETTERS \
1751 "_"
1752
1753 bool cg_controller_is_valid(const char *p) {
1754 const char *t, *s;
1755
1756 if (!p)
1757 return false;
1758
1759 s = startswith(p, "name=");
1760 if (s)
1761 p = s;
1762
1763 if (*p == 0 || *p == '_')
1764 return false;
1765
1766 for (t = p; *t; t++)
1767 if (!strchr(CONTROLLER_VALID, *t))
1768 return false;
1769
1770 if (t - p > FILENAME_MAX)
1771 return false;
1772
1773 return true;
1774 }
1775
1776 int cg_slice_to_path(const char *unit, char **ret) {
1777 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1778 const char *dash;
1779 int r;
1780
1781 assert(unit);
1782 assert(ret);
1783
1784 if (streq(unit, "-.slice")) {
1785 char *x;
1786
1787 x = strdup("");
1788 if (!x)
1789 return -ENOMEM;
1790 *ret = x;
1791 return 0;
1792 }
1793
1794 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1795 return -EINVAL;
1796
1797 if (!endswith(unit, ".slice"))
1798 return -EINVAL;
1799
1800 r = unit_name_to_prefix(unit, &p);
1801 if (r < 0)
1802 return r;
1803
1804 dash = strchr(p, '-');
1805
1806 /* Don't allow initial dashes */
1807 if (dash == p)
1808 return -EINVAL;
1809
1810 while (dash) {
1811 _cleanup_free_ char *escaped = NULL;
1812 char n[dash - p + sizeof(".slice")];
1813
1814 /* Don't allow trailing or double dashes */
1815 if (dash[1] == 0 || dash[1] == '-')
1816 return -EINVAL;
1817
1818 strcpy(stpncpy(n, p, dash - p), ".slice");
1819 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1820 return -EINVAL;
1821
1822 escaped = cg_escape(n);
1823 if (!escaped)
1824 return -ENOMEM;
1825
1826 if (!strextend(&s, escaped, "/", NULL))
1827 return -ENOMEM;
1828
1829 dash = strchr(dash+1, '-');
1830 }
1831
1832 e = cg_escape(unit);
1833 if (!e)
1834 return -ENOMEM;
1835
1836 if (!strextend(&s, e, NULL))
1837 return -ENOMEM;
1838
1839 *ret = s;
1840 s = NULL;
1841
1842 return 0;
1843 }
1844
1845 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1846 _cleanup_free_ char *p = NULL;
1847 int r;
1848
1849 r = cg_get_path(controller, path, attribute, &p);
1850 if (r < 0)
1851 return r;
1852
1853 return write_string_file(p, value, 0);
1854 }
1855
1856 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1857 _cleanup_free_ char *p = NULL;
1858 int r;
1859
1860 r = cg_get_path(controller, path, attribute, &p);
1861 if (r < 0)
1862 return r;
1863
1864 return read_one_line_file(p, ret);
1865 }
1866
1867 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1868 CGroupController c;
1869 int r, unified;
1870
1871 /* This one will create a cgroup in our private tree, but also
1872 * duplicate it in the trees specified in mask, and remove it
1873 * in all others */
1874
1875 /* First create the cgroup in our own hierarchy. */
1876 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1877 if (r < 0)
1878 return r;
1879
1880 /* If we are in the unified hierarchy, we are done now */
1881 unified = cg_unified();
1882 if (unified < 0)
1883 return unified;
1884 if (unified > 0)
1885 return 0;
1886
1887 /* Otherwise, do the same in the other hierarchies */
1888 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1889 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1890 const char *n;
1891
1892 n = cgroup_controller_to_string(c);
1893
1894 if (mask & bit)
1895 (void) cg_create(n, path);
1896 else if (supported & bit)
1897 (void) cg_trim(n, path, true);
1898 }
1899
1900 return 0;
1901 }
1902
1903 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1904 CGroupController c;
1905 int r, unified;
1906
1907 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1908 if (r < 0)
1909 return r;
1910
1911 unified = cg_unified();
1912 if (unified < 0)
1913 return unified;
1914 if (unified > 0)
1915 return 0;
1916
1917 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1918 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1919 const char *p = NULL;
1920
1921 if (!(supported & bit))
1922 continue;
1923
1924 if (path_callback)
1925 p = path_callback(bit, userdata);
1926
1927 if (!p)
1928 p = path;
1929
1930 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1931 }
1932
1933 return 0;
1934 }
1935
1936 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1937 Iterator i;
1938 void *pidp;
1939 int r = 0;
1940
1941 SET_FOREACH(pidp, pids, i) {
1942 pid_t pid = PTR_TO_PID(pidp);
1943 int q;
1944
1945 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1946 if (q < 0 && r >= 0)
1947 r = q;
1948 }
1949
1950 return r;
1951 }
1952
1953 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1954 CGroupController c;
1955 int r = 0, unified;
1956
1957 if (!path_equal(from, to)) {
1958 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1959 if (r < 0)
1960 return r;
1961 }
1962
1963 unified = cg_unified();
1964 if (unified < 0)
1965 return unified;
1966 if (unified > 0)
1967 return r;
1968
1969 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1970 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1971 const char *p = NULL;
1972
1973 if (!(supported & bit))
1974 continue;
1975
1976 if (to_callback)
1977 p = to_callback(bit, userdata);
1978
1979 if (!p)
1980 p = to;
1981
1982 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1983 }
1984
1985 return 0;
1986 }
1987
1988 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1989 CGroupController c;
1990 int r, unified;
1991
1992 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1993 if (r < 0)
1994 return r;
1995
1996 unified = cg_unified();
1997 if (unified < 0)
1998 return unified;
1999 if (unified > 0)
2000 return r;
2001
2002 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2003 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2004
2005 if (!(supported & bit))
2006 continue;
2007
2008 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2009 }
2010
2011 return 0;
2012 }
2013
2014 int cg_mask_supported(CGroupMask *ret) {
2015 CGroupMask mask = 0;
2016 int r, unified;
2017
2018 /* Determines the mask of supported cgroup controllers. Only
2019 * includes controllers we can make sense of and that are
2020 * actually accessible. */
2021
2022 unified = cg_unified();
2023 if (unified < 0)
2024 return unified;
2025 if (unified > 0) {
2026 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2027 const char *c;
2028
2029 /* In the unified hierarchy we can read the supported
2030 * and accessible controllers from a the top-level
2031 * cgroup attribute */
2032
2033 r = cg_get_root_path(&root);
2034 if (r < 0)
2035 return r;
2036
2037 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2038 if (r < 0)
2039 return r;
2040
2041 r = read_one_line_file(path, &controllers);
2042 if (r < 0)
2043 return r;
2044
2045 c = controllers;
2046 for (;;) {
2047 _cleanup_free_ char *n = NULL;
2048 CGroupController v;
2049
2050 r = extract_first_word(&c, &n, NULL, 0);
2051 if (r < 0)
2052 return r;
2053 if (r == 0)
2054 break;
2055
2056 v = cgroup_controller_from_string(n);
2057 if (v < 0)
2058 continue;
2059
2060 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2061 }
2062
2063 /* Currently, we only support the memory, io and pids
2064 * controller in the unified hierarchy, mask
2065 * everything else off. */
2066 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2067
2068 } else {
2069 CGroupController c;
2070
2071 /* In the legacy hierarchy, we check whether which
2072 * hierarchies are mounted. */
2073
2074 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2075 const char *n;
2076
2077 n = cgroup_controller_to_string(c);
2078 if (controller_is_accessible(n) >= 0)
2079 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2080 }
2081 }
2082
2083 *ret = mask;
2084 return 0;
2085 }
2086
2087 int cg_kernel_controllers(Set *controllers) {
2088 _cleanup_fclose_ FILE *f = NULL;
2089 char buf[LINE_MAX];
2090 int r;
2091
2092 assert(controllers);
2093
2094 /* Determines the full list of kernel-known controllers. Might
2095 * include controllers we don't actually support, arbitrary
2096 * named hierarchies and controllers that aren't currently
2097 * accessible (because not mounted). */
2098
2099 f = fopen("/proc/cgroups", "re");
2100 if (!f) {
2101 if (errno == ENOENT)
2102 return 0;
2103 return -errno;
2104 }
2105
2106 /* Ignore the header line */
2107 (void) fgets(buf, sizeof(buf), f);
2108
2109 for (;;) {
2110 char *controller;
2111 int enabled = 0;
2112
2113 errno = 0;
2114 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2115
2116 if (feof(f))
2117 break;
2118
2119 if (ferror(f) && errno > 0)
2120 return -errno;
2121
2122 return -EBADMSG;
2123 }
2124
2125 if (!enabled) {
2126 free(controller);
2127 continue;
2128 }
2129
2130 if (!cg_controller_is_valid(controller)) {
2131 free(controller);
2132 return -EBADMSG;
2133 }
2134
2135 r = set_consume(controllers, controller);
2136 if (r < 0)
2137 return r;
2138 }
2139
2140 return 0;
2141 }
2142
2143 static thread_local int unified_cache = -1;
2144
2145 int cg_unified(void) {
2146 struct statfs fs;
2147
2148 /* Checks if we support the unified hierarchy. Returns an
2149 * error when the cgroup hierarchies aren't mounted yet or we
2150 * have any other trouble determining if the unified hierarchy
2151 * is supported. */
2152
2153 if (unified_cache >= 0)
2154 return unified_cache;
2155
2156 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2157 return -errno;
2158
2159 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2160 unified_cache = true;
2161 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2162 unified_cache = false;
2163 else
2164 return -ENOMEDIUM;
2165
2166 return unified_cache;
2167 }
2168
2169 void cg_unified_flush(void) {
2170 unified_cache = -1;
2171 }
2172
2173 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2174 _cleanup_free_ char *fs = NULL;
2175 CGroupController c;
2176 int r, unified;
2177
2178 assert(p);
2179
2180 if (supported == 0)
2181 return 0;
2182
2183 unified = cg_unified();
2184 if (unified < 0)
2185 return unified;
2186 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2187 return 0;
2188
2189 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2190 if (r < 0)
2191 return r;
2192
2193 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2194 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2195 const char *n;
2196
2197 if (!(supported & bit))
2198 continue;
2199
2200 n = cgroup_controller_to_string(c);
2201 {
2202 char s[1 + strlen(n) + 1];
2203
2204 s[0] = mask & bit ? '+' : '-';
2205 strcpy(s + 1, n);
2206
2207 r = write_string_file(fs, s, 0);
2208 if (r < 0)
2209 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2210 }
2211 }
2212
2213 return 0;
2214 }
2215
2216 bool cg_is_unified_wanted(void) {
2217 static thread_local int wanted = -1;
2218 int r, unified;
2219
2220 /* If the hierarchy is already mounted, then follow whatever
2221 * was chosen for it. */
2222 unified = cg_unified();
2223 if (unified >= 0)
2224 return unified;
2225
2226 /* Otherwise, let's see what the kernel command line has to
2227 * say. Since checking that is expensive, let's cache the
2228 * result. */
2229 if (wanted >= 0)
2230 return wanted;
2231
2232 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2233 if (r > 0)
2234 return (wanted = true);
2235 else {
2236 _cleanup_free_ char *value = NULL;
2237
2238 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2239 if (r < 0)
2240 return false;
2241 if (r == 0)
2242 return (wanted = false);
2243
2244 return (wanted = parse_boolean(value) > 0);
2245 }
2246 }
2247
2248 bool cg_is_legacy_wanted(void) {
2249 return !cg_is_unified_wanted();
2250 }
2251
2252 int cg_weight_parse(const char *s, uint64_t *ret) {
2253 uint64_t u;
2254 int r;
2255
2256 if (isempty(s)) {
2257 *ret = CGROUP_WEIGHT_INVALID;
2258 return 0;
2259 }
2260
2261 r = safe_atou64(s, &u);
2262 if (r < 0)
2263 return r;
2264
2265 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2266 return -ERANGE;
2267
2268 *ret = u;
2269 return 0;
2270 }
2271
2272 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2273 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2274 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2275 };
2276
2277 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2278 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2279 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2280 };
2281
2282 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2283
2284 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2285 uint64_t u;
2286 int r;
2287
2288 if (isempty(s)) {
2289 *ret = CGROUP_CPU_SHARES_INVALID;
2290 return 0;
2291 }
2292
2293 r = safe_atou64(s, &u);
2294 if (r < 0)
2295 return r;
2296
2297 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2298 return -ERANGE;
2299
2300 *ret = u;
2301 return 0;
2302 }
2303
2304 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2305 uint64_t u;
2306 int r;
2307
2308 if (isempty(s)) {
2309 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2310 return 0;
2311 }
2312
2313 r = safe_atou64(s, &u);
2314 if (r < 0)
2315 return r;
2316
2317 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2318 return -ERANGE;
2319
2320 *ret = u;
2321 return 0;
2322 }
2323
2324 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2325 [CGROUP_CONTROLLER_CPU] = "cpu",
2326 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2327 [CGROUP_CONTROLLER_IO] = "io",
2328 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2329 [CGROUP_CONTROLLER_MEMORY] = "memory",
2330 [CGROUP_CONTROLLER_DEVICES] = "devices",
2331 [CGROUP_CONTROLLER_PIDS] = "pids",
2332 };
2333
2334 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);