]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #3728 from poettering/dynamic-users
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <ftw.h>
23 #include <limits.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
35 #include "def.h"
36 #include "dirent-util.h"
37 #include "extract-word.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "formats-util.h"
41 #include "fs-util.h"
42 #include "log.h"
43 #include "login-util.h"
44 #include "macro.h"
45 #include "missing.h"
46 #include "mkdir.h"
47 #include "parse-util.h"
48 #include "path-util.h"
49 #include "proc-cmdline.h"
50 #include "process-util.h"
51 #include "set.h"
52 #include "special.h"
53 #include "stat-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "unit-name.h"
58 #include "user-util.h"
59
60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
61 _cleanup_free_ char *fs = NULL;
62 FILE *f;
63 int r;
64
65 assert(_f);
66
67 r = cg_get_path(controller, path, "cgroup.procs", &fs);
68 if (r < 0)
69 return r;
70
71 f = fopen(fs, "re");
72 if (!f)
73 return -errno;
74
75 *_f = f;
76 return 0;
77 }
78
79 int cg_read_pid(FILE *f, pid_t *_pid) {
80 unsigned long ul;
81
82 /* Note that the cgroup.procs might contain duplicates! See
83 * cgroups.txt for details. */
84
85 assert(f);
86 assert(_pid);
87
88 errno = 0;
89 if (fscanf(f, "%lu", &ul) != 1) {
90
91 if (feof(f))
92 return 0;
93
94 return errno > 0 ? -errno : -EIO;
95 }
96
97 if (ul <= 0)
98 return -EIO;
99
100 *_pid = (pid_t) ul;
101 return 1;
102 }
103
104 int cg_read_event(const char *controller, const char *path, const char *event,
105 char **val)
106 {
107 _cleanup_free_ char *events = NULL, *content = NULL;
108 char *p, *line;
109 int r;
110
111 r = cg_get_path(controller, path, "cgroup.events", &events);
112 if (r < 0)
113 return r;
114
115 r = read_full_file(events, &content, NULL);
116 if (r < 0)
117 return r;
118
119 p = content;
120 while ((line = strsep(&p, "\n"))) {
121 char *key;
122
123 key = strsep(&line, " ");
124 if (!key || !line)
125 return -EINVAL;
126
127 if (strcmp(key, event))
128 continue;
129
130 *val = strdup(line);
131 return 0;
132 }
133
134 return -ENOENT;
135 }
136
137 bool cg_ns_supported(void) {
138 static thread_local int enabled = -1;
139
140 if (enabled >= 0)
141 return enabled;
142
143 if (access("/proc/self/ns/cgroup", F_OK) == 0)
144 enabled = 1;
145 else
146 enabled = 0;
147
148 return enabled;
149 }
150
151 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
152 _cleanup_free_ char *fs = NULL;
153 int r;
154 DIR *d;
155
156 assert(_d);
157
158 /* This is not recursive! */
159
160 r = cg_get_path(controller, path, NULL, &fs);
161 if (r < 0)
162 return r;
163
164 d = opendir(fs);
165 if (!d)
166 return -errno;
167
168 *_d = d;
169 return 0;
170 }
171
172 int cg_read_subgroup(DIR *d, char **fn) {
173 struct dirent *de;
174
175 assert(d);
176 assert(fn);
177
178 FOREACH_DIRENT_ALL(de, d, return -errno) {
179 char *b;
180
181 if (de->d_type != DT_DIR)
182 continue;
183
184 if (streq(de->d_name, ".") ||
185 streq(de->d_name, ".."))
186 continue;
187
188 b = strdup(de->d_name);
189 if (!b)
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
196 return 0;
197 }
198
199 int cg_rmdir(const char *controller, const char *path) {
200 _cleanup_free_ char *p = NULL;
201 int r;
202
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
205 return r;
206
207 r = rmdir(p);
208 if (r < 0 && errno != ENOENT)
209 return -errno;
210
211 return 0;
212 }
213
214 int cg_kill(
215 const char *controller,
216 const char *path,
217 int sig,
218 CGroupFlags flags,
219 Set *s,
220 cg_kill_log_func_t log_kill,
221 void *userdata) {
222
223 _cleanup_set_free_ Set *allocated_set = NULL;
224 bool done = false;
225 int r, ret = 0;
226 pid_t my_pid;
227
228 assert(sig >= 0);
229
230 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
231 * SIGCONT on SIGKILL. */
232 if (IN_SET(sig, SIGCONT, SIGKILL))
233 flags &= ~CGROUP_SIGCONT;
234
235 /* This goes through the tasks list and kills them all. This
236 * is repeated until no further processes are added to the
237 * tasks list, to properly handle forking processes */
238
239 if (!s) {
240 s = allocated_set = set_new(NULL);
241 if (!s)
242 return -ENOMEM;
243 }
244
245 my_pid = getpid();
246
247 do {
248 _cleanup_fclose_ FILE *f = NULL;
249 pid_t pid = 0;
250 done = true;
251
252 r = cg_enumerate_processes(controller, path, &f);
253 if (r < 0) {
254 if (ret >= 0 && r != -ENOENT)
255 return r;
256
257 return ret;
258 }
259
260 while ((r = cg_read_pid(f, &pid)) > 0) {
261
262 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
263 continue;
264
265 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
266 continue;
267
268 if (log_kill)
269 log_kill(pid, sig, userdata);
270
271 /* If we haven't killed this process yet, kill
272 * it */
273 if (kill(pid, sig) < 0) {
274 if (ret >= 0 && errno != ESRCH)
275 ret = -errno;
276 } else {
277 if (flags & CGROUP_SIGCONT)
278 (void) kill(pid, SIGCONT);
279
280 if (ret == 0)
281 ret = 1;
282 }
283
284 done = false;
285
286 r = set_put(s, PID_TO_PTR(pid));
287 if (r < 0) {
288 if (ret >= 0)
289 return r;
290
291 return ret;
292 }
293 }
294
295 if (r < 0) {
296 if (ret >= 0)
297 return r;
298
299 return ret;
300 }
301
302 /* To avoid racing against processes which fork
303 * quicker than we can kill them we repeat this until
304 * no new pids need to be killed. */
305
306 } while (!done);
307
308 return ret;
309 }
310
311 int cg_kill_recursive(
312 const char *controller,
313 const char *path,
314 int sig,
315 CGroupFlags flags,
316 Set *s,
317 cg_kill_log_func_t log_kill,
318 void *userdata) {
319
320 _cleanup_set_free_ Set *allocated_set = NULL;
321 _cleanup_closedir_ DIR *d = NULL;
322 int r, ret;
323 char *fn;
324
325 assert(path);
326 assert(sig >= 0);
327
328 if (!s) {
329 s = allocated_set = set_new(NULL);
330 if (!s)
331 return -ENOMEM;
332 }
333
334 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
335
336 r = cg_enumerate_subgroups(controller, path, &d);
337 if (r < 0) {
338 if (ret >= 0 && r != -ENOENT)
339 return r;
340
341 return ret;
342 }
343
344 while ((r = cg_read_subgroup(d, &fn)) > 0) {
345 _cleanup_free_ char *p = NULL;
346
347 p = strjoin(path, "/", fn, NULL);
348 free(fn);
349 if (!p)
350 return -ENOMEM;
351
352 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
353 if (r != 0 && ret >= 0)
354 ret = r;
355 }
356 if (ret >= 0 && r < 0)
357 ret = r;
358
359 if (flags & CGROUP_REMOVE) {
360 r = cg_rmdir(controller, path);
361 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
362 return r;
363 }
364
365 return ret;
366 }
367
368 int cg_migrate(
369 const char *cfrom,
370 const char *pfrom,
371 const char *cto,
372 const char *pto,
373 CGroupFlags flags) {
374
375 bool done = false;
376 _cleanup_set_free_ Set *s = NULL;
377 int r, ret = 0;
378 pid_t my_pid;
379
380 assert(cfrom);
381 assert(pfrom);
382 assert(cto);
383 assert(pto);
384
385 s = set_new(NULL);
386 if (!s)
387 return -ENOMEM;
388
389 my_pid = getpid();
390
391 do {
392 _cleanup_fclose_ FILE *f = NULL;
393 pid_t pid = 0;
394 done = true;
395
396 r = cg_enumerate_processes(cfrom, pfrom, &f);
397 if (r < 0) {
398 if (ret >= 0 && r != -ENOENT)
399 return r;
400
401 return ret;
402 }
403
404 while ((r = cg_read_pid(f, &pid)) > 0) {
405
406 /* This might do weird stuff if we aren't a
407 * single-threaded program. However, we
408 * luckily know we are not */
409 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
410 continue;
411
412 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
413 continue;
414
415 /* Ignore kernel threads. Since they can only
416 * exist in the root cgroup, we only check for
417 * them there. */
418 if (cfrom &&
419 (isempty(pfrom) || path_equal(pfrom, "/")) &&
420 is_kernel_thread(pid) > 0)
421 continue;
422
423 r = cg_attach(cto, pto, pid);
424 if (r < 0) {
425 if (ret >= 0 && r != -ESRCH)
426 ret = r;
427 } else if (ret == 0)
428 ret = 1;
429
430 done = false;
431
432 r = set_put(s, PID_TO_PTR(pid));
433 if (r < 0) {
434 if (ret >= 0)
435 return r;
436
437 return ret;
438 }
439 }
440
441 if (r < 0) {
442 if (ret >= 0)
443 return r;
444
445 return ret;
446 }
447 } while (!done);
448
449 return ret;
450 }
451
452 int cg_migrate_recursive(
453 const char *cfrom,
454 const char *pfrom,
455 const char *cto,
456 const char *pto,
457 CGroupFlags flags) {
458
459 _cleanup_closedir_ DIR *d = NULL;
460 int r, ret = 0;
461 char *fn;
462
463 assert(cfrom);
464 assert(pfrom);
465 assert(cto);
466 assert(pto);
467
468 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
469
470 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
471 if (r < 0) {
472 if (ret >= 0 && r != -ENOENT)
473 return r;
474
475 return ret;
476 }
477
478 while ((r = cg_read_subgroup(d, &fn)) > 0) {
479 _cleanup_free_ char *p = NULL;
480
481 p = strjoin(pfrom, "/", fn, NULL);
482 free(fn);
483 if (!p)
484 return -ENOMEM;
485
486 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
487 if (r != 0 && ret >= 0)
488 ret = r;
489 }
490
491 if (r < 0 && ret >= 0)
492 ret = r;
493
494 if (flags & CGROUP_REMOVE) {
495 r = cg_rmdir(cfrom, pfrom);
496 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
497 return r;
498 }
499
500 return ret;
501 }
502
503 int cg_migrate_recursive_fallback(
504 const char *cfrom,
505 const char *pfrom,
506 const char *cto,
507 const char *pto,
508 CGroupFlags flags) {
509
510 int r;
511
512 assert(cfrom);
513 assert(pfrom);
514 assert(cto);
515 assert(pto);
516
517 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
518 if (r < 0) {
519 char prefix[strlen(pto) + 1];
520
521 /* This didn't work? Then let's try all prefixes of the destination */
522
523 PATH_FOREACH_PREFIX(prefix, pto) {
524 int q;
525
526 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
527 if (q >= 0)
528 return q;
529 }
530 }
531
532 return r;
533 }
534
535 static const char *controller_to_dirname(const char *controller) {
536 const char *e;
537
538 assert(controller);
539
540 /* Converts a controller name to the directory name below
541 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
542 * just cuts off the name= prefixed used for named
543 * hierarchies, if it is specified. */
544
545 e = startswith(controller, "name=");
546 if (e)
547 return e;
548
549 return controller;
550 }
551
552 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
553 const char *dn;
554 char *t = NULL;
555
556 assert(fs);
557 assert(controller);
558
559 dn = controller_to_dirname(controller);
560
561 if (isempty(path) && isempty(suffix))
562 t = strappend("/sys/fs/cgroup/", dn);
563 else if (isempty(path))
564 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
565 else if (isempty(suffix))
566 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
567 else
568 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
569 if (!t)
570 return -ENOMEM;
571
572 *fs = t;
573 return 0;
574 }
575
576 static int join_path_unified(const char *path, const char *suffix, char **fs) {
577 char *t;
578
579 assert(fs);
580
581 if (isempty(path) && isempty(suffix))
582 t = strdup("/sys/fs/cgroup");
583 else if (isempty(path))
584 t = strappend("/sys/fs/cgroup/", suffix);
585 else if (isempty(suffix))
586 t = strappend("/sys/fs/cgroup/", path);
587 else
588 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
589 if (!t)
590 return -ENOMEM;
591
592 *fs = t;
593 return 0;
594 }
595
596 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
597 int unified, r;
598
599 assert(fs);
600
601 if (!controller) {
602 char *t;
603
604 /* If no controller is specified, we return the path
605 * *below* the controllers, without any prefix. */
606
607 if (!path && !suffix)
608 return -EINVAL;
609
610 if (!suffix)
611 t = strdup(path);
612 else if (!path)
613 t = strdup(suffix);
614 else
615 t = strjoin(path, "/", suffix, NULL);
616 if (!t)
617 return -ENOMEM;
618
619 *fs = path_kill_slashes(t);
620 return 0;
621 }
622
623 if (!cg_controller_is_valid(controller))
624 return -EINVAL;
625
626 unified = cg_unified();
627 if (unified < 0)
628 return unified;
629
630 if (unified > 0)
631 r = join_path_unified(path, suffix, fs);
632 else
633 r = join_path_legacy(controller, path, suffix, fs);
634 if (r < 0)
635 return r;
636
637 path_kill_slashes(*fs);
638 return 0;
639 }
640
641 static int controller_is_accessible(const char *controller) {
642 int unified;
643
644 assert(controller);
645
646 /* Checks whether a specific controller is accessible,
647 * i.e. its hierarchy mounted. In the unified hierarchy all
648 * controllers are considered accessible, except for the named
649 * hierarchies */
650
651 if (!cg_controller_is_valid(controller))
652 return -EINVAL;
653
654 unified = cg_unified();
655 if (unified < 0)
656 return unified;
657 if (unified > 0) {
658 /* We don't support named hierarchies if we are using
659 * the unified hierarchy. */
660
661 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
662 return 0;
663
664 if (startswith(controller, "name="))
665 return -EOPNOTSUPP;
666
667 } else {
668 const char *cc, *dn;
669
670 dn = controller_to_dirname(controller);
671 cc = strjoina("/sys/fs/cgroup/", dn);
672
673 if (laccess(cc, F_OK) < 0)
674 return -errno;
675 }
676
677 return 0;
678 }
679
680 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
681 int r;
682
683 assert(controller);
684 assert(fs);
685
686 /* Check if the specified controller is actually accessible */
687 r = controller_is_accessible(controller);
688 if (r < 0)
689 return r;
690
691 return cg_get_path(controller, path, suffix, fs);
692 }
693
694 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
695 assert(path);
696 assert(sb);
697 assert(ftwbuf);
698
699 if (typeflag != FTW_DP)
700 return 0;
701
702 if (ftwbuf->level < 1)
703 return 0;
704
705 (void) rmdir(path);
706 return 0;
707 }
708
709 int cg_trim(const char *controller, const char *path, bool delete_root) {
710 _cleanup_free_ char *fs = NULL;
711 int r = 0;
712
713 assert(path);
714
715 r = cg_get_path(controller, path, NULL, &fs);
716 if (r < 0)
717 return r;
718
719 errno = 0;
720 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
721 if (errno == ENOENT)
722 r = 0;
723 else if (errno > 0)
724 r = -errno;
725 else
726 r = -EIO;
727 }
728
729 if (delete_root) {
730 if (rmdir(fs) < 0 && errno != ENOENT)
731 return -errno;
732 }
733
734 return r;
735 }
736
737 int cg_create(const char *controller, const char *path) {
738 _cleanup_free_ char *fs = NULL;
739 int r;
740
741 r = cg_get_path_and_check(controller, path, NULL, &fs);
742 if (r < 0)
743 return r;
744
745 r = mkdir_parents(fs, 0755);
746 if (r < 0)
747 return r;
748
749 if (mkdir(fs, 0755) < 0) {
750
751 if (errno == EEXIST)
752 return 0;
753
754 return -errno;
755 }
756
757 return 1;
758 }
759
760 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
761 int r, q;
762
763 assert(pid >= 0);
764
765 r = cg_create(controller, path);
766 if (r < 0)
767 return r;
768
769 q = cg_attach(controller, path, pid);
770 if (q < 0)
771 return q;
772
773 /* This does not remove the cgroup on failure */
774 return r;
775 }
776
777 int cg_attach(const char *controller, const char *path, pid_t pid) {
778 _cleanup_free_ char *fs = NULL;
779 char c[DECIMAL_STR_MAX(pid_t) + 2];
780 int r;
781
782 assert(path);
783 assert(pid >= 0);
784
785 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
786 if (r < 0)
787 return r;
788
789 if (pid == 0)
790 pid = getpid();
791
792 xsprintf(c, PID_FMT "\n", pid);
793
794 return write_string_file(fs, c, 0);
795 }
796
797 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
798 int r;
799
800 assert(controller);
801 assert(path);
802 assert(pid >= 0);
803
804 r = cg_attach(controller, path, pid);
805 if (r < 0) {
806 char prefix[strlen(path) + 1];
807
808 /* This didn't work? Then let's try all prefixes of
809 * the destination */
810
811 PATH_FOREACH_PREFIX(prefix, path) {
812 int q;
813
814 q = cg_attach(controller, prefix, pid);
815 if (q >= 0)
816 return q;
817 }
818 }
819
820 return r;
821 }
822
823 int cg_set_group_access(
824 const char *controller,
825 const char *path,
826 mode_t mode,
827 uid_t uid,
828 gid_t gid) {
829
830 _cleanup_free_ char *fs = NULL;
831 int r;
832
833 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
834 return 0;
835
836 if (mode != MODE_INVALID)
837 mode &= 0777;
838
839 r = cg_get_path(controller, path, NULL, &fs);
840 if (r < 0)
841 return r;
842
843 return chmod_and_chown(fs, mode, uid, gid);
844 }
845
846 int cg_set_task_access(
847 const char *controller,
848 const char *path,
849 mode_t mode,
850 uid_t uid,
851 gid_t gid) {
852
853 _cleanup_free_ char *fs = NULL, *procs = NULL;
854 int r, unified;
855
856 assert(path);
857
858 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
859 return 0;
860
861 if (mode != MODE_INVALID)
862 mode &= 0666;
863
864 r = cg_get_path(controller, path, "cgroup.procs", &fs);
865 if (r < 0)
866 return r;
867
868 r = chmod_and_chown(fs, mode, uid, gid);
869 if (r < 0)
870 return r;
871
872 unified = cg_unified();
873 if (unified < 0)
874 return unified;
875 if (unified)
876 return 0;
877
878 /* Compatibility, Always keep values for "tasks" in sync with
879 * "cgroup.procs" */
880 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
881 (void) chmod_and_chown(procs, mode, uid, gid);
882
883 return 0;
884 }
885
886 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
887 _cleanup_fclose_ FILE *f = NULL;
888 char line[LINE_MAX];
889 const char *fs;
890 size_t cs = 0;
891 int unified;
892
893 assert(path);
894 assert(pid >= 0);
895
896 unified = cg_unified();
897 if (unified < 0)
898 return unified;
899 if (unified == 0) {
900 if (controller) {
901 if (!cg_controller_is_valid(controller))
902 return -EINVAL;
903 } else
904 controller = SYSTEMD_CGROUP_CONTROLLER;
905
906 cs = strlen(controller);
907 }
908
909 fs = procfs_file_alloca(pid, "cgroup");
910 f = fopen(fs, "re");
911 if (!f)
912 return errno == ENOENT ? -ESRCH : -errno;
913
914 FOREACH_LINE(line, f, return -errno) {
915 char *e, *p;
916
917 truncate_nl(line);
918
919 if (unified) {
920 e = startswith(line, "0:");
921 if (!e)
922 continue;
923
924 e = strchr(e, ':');
925 if (!e)
926 continue;
927 } else {
928 char *l;
929 size_t k;
930 const char *word, *state;
931 bool found = false;
932
933 l = strchr(line, ':');
934 if (!l)
935 continue;
936
937 l++;
938 e = strchr(l, ':');
939 if (!e)
940 continue;
941
942 *e = 0;
943 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
944 if (k == cs && memcmp(word, controller, cs) == 0) {
945 found = true;
946 break;
947 }
948 }
949
950 if (!found)
951 continue;
952 }
953
954 p = strdup(e + 1);
955 if (!p)
956 return -ENOMEM;
957
958 *path = p;
959 return 0;
960 }
961
962 return -ENODATA;
963 }
964
965 int cg_install_release_agent(const char *controller, const char *agent) {
966 _cleanup_free_ char *fs = NULL, *contents = NULL;
967 const char *sc;
968 int r, unified;
969
970 assert(agent);
971
972 unified = cg_unified();
973 if (unified < 0)
974 return unified;
975 if (unified) /* doesn't apply to unified hierarchy */
976 return -EOPNOTSUPP;
977
978 r = cg_get_path(controller, NULL, "release_agent", &fs);
979 if (r < 0)
980 return r;
981
982 r = read_one_line_file(fs, &contents);
983 if (r < 0)
984 return r;
985
986 sc = strstrip(contents);
987 if (isempty(sc)) {
988 r = write_string_file(fs, agent, 0);
989 if (r < 0)
990 return r;
991 } else if (!path_equal(sc, agent))
992 return -EEXIST;
993
994 fs = mfree(fs);
995 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
996 if (r < 0)
997 return r;
998
999 contents = mfree(contents);
1000 r = read_one_line_file(fs, &contents);
1001 if (r < 0)
1002 return r;
1003
1004 sc = strstrip(contents);
1005 if (streq(sc, "0")) {
1006 r = write_string_file(fs, "1", 0);
1007 if (r < 0)
1008 return r;
1009
1010 return 1;
1011 }
1012
1013 if (!streq(sc, "1"))
1014 return -EIO;
1015
1016 return 0;
1017 }
1018
1019 int cg_uninstall_release_agent(const char *controller) {
1020 _cleanup_free_ char *fs = NULL;
1021 int r, unified;
1022
1023 unified = cg_unified();
1024 if (unified < 0)
1025 return unified;
1026 if (unified) /* Doesn't apply to unified hierarchy */
1027 return -EOPNOTSUPP;
1028
1029 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1030 if (r < 0)
1031 return r;
1032
1033 r = write_string_file(fs, "0", 0);
1034 if (r < 0)
1035 return r;
1036
1037 fs = mfree(fs);
1038
1039 r = cg_get_path(controller, NULL, "release_agent", &fs);
1040 if (r < 0)
1041 return r;
1042
1043 r = write_string_file(fs, "", 0);
1044 if (r < 0)
1045 return r;
1046
1047 return 0;
1048 }
1049
1050 int cg_is_empty(const char *controller, const char *path) {
1051 _cleanup_fclose_ FILE *f = NULL;
1052 pid_t pid;
1053 int r;
1054
1055 assert(path);
1056
1057 r = cg_enumerate_processes(controller, path, &f);
1058 if (r == -ENOENT)
1059 return 1;
1060 if (r < 0)
1061 return r;
1062
1063 r = cg_read_pid(f, &pid);
1064 if (r < 0)
1065 return r;
1066
1067 return r == 0;
1068 }
1069
1070 int cg_is_empty_recursive(const char *controller, const char *path) {
1071 int unified, r;
1072
1073 assert(path);
1074
1075 /* The root cgroup is always populated */
1076 if (controller && (isempty(path) || path_equal(path, "/")))
1077 return false;
1078
1079 unified = cg_unified();
1080 if (unified < 0)
1081 return unified;
1082
1083 if (unified > 0) {
1084 _cleanup_free_ char *t = NULL;
1085
1086 /* On the unified hierarchy we can check empty state
1087 * via the "populated" attribute of "cgroup.events". */
1088
1089 r = cg_read_event(controller, path, "populated", &t);
1090 if (r < 0)
1091 return r;
1092
1093 return streq(t, "0");
1094 } else {
1095 _cleanup_closedir_ DIR *d = NULL;
1096 char *fn;
1097
1098 r = cg_is_empty(controller, path);
1099 if (r <= 0)
1100 return r;
1101
1102 r = cg_enumerate_subgroups(controller, path, &d);
1103 if (r == -ENOENT)
1104 return 1;
1105 if (r < 0)
1106 return r;
1107
1108 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1109 _cleanup_free_ char *p = NULL;
1110
1111 p = strjoin(path, "/", fn, NULL);
1112 free(fn);
1113 if (!p)
1114 return -ENOMEM;
1115
1116 r = cg_is_empty_recursive(controller, p);
1117 if (r <= 0)
1118 return r;
1119 }
1120 if (r < 0)
1121 return r;
1122
1123 return true;
1124 }
1125 }
1126
1127 int cg_split_spec(const char *spec, char **controller, char **path) {
1128 char *t = NULL, *u = NULL;
1129 const char *e;
1130
1131 assert(spec);
1132
1133 if (*spec == '/') {
1134 if (!path_is_safe(spec))
1135 return -EINVAL;
1136
1137 if (path) {
1138 t = strdup(spec);
1139 if (!t)
1140 return -ENOMEM;
1141
1142 *path = path_kill_slashes(t);
1143 }
1144
1145 if (controller)
1146 *controller = NULL;
1147
1148 return 0;
1149 }
1150
1151 e = strchr(spec, ':');
1152 if (!e) {
1153 if (!cg_controller_is_valid(spec))
1154 return -EINVAL;
1155
1156 if (controller) {
1157 t = strdup(spec);
1158 if (!t)
1159 return -ENOMEM;
1160
1161 *controller = t;
1162 }
1163
1164 if (path)
1165 *path = NULL;
1166
1167 return 0;
1168 }
1169
1170 t = strndup(spec, e-spec);
1171 if (!t)
1172 return -ENOMEM;
1173 if (!cg_controller_is_valid(t)) {
1174 free(t);
1175 return -EINVAL;
1176 }
1177
1178 if (isempty(e+1))
1179 u = NULL;
1180 else {
1181 u = strdup(e+1);
1182 if (!u) {
1183 free(t);
1184 return -ENOMEM;
1185 }
1186
1187 if (!path_is_safe(u) ||
1188 !path_is_absolute(u)) {
1189 free(t);
1190 free(u);
1191 return -EINVAL;
1192 }
1193
1194 path_kill_slashes(u);
1195 }
1196
1197 if (controller)
1198 *controller = t;
1199 else
1200 free(t);
1201
1202 if (path)
1203 *path = u;
1204 else
1205 free(u);
1206
1207 return 0;
1208 }
1209
1210 int cg_mangle_path(const char *path, char **result) {
1211 _cleanup_free_ char *c = NULL, *p = NULL;
1212 char *t;
1213 int r;
1214
1215 assert(path);
1216 assert(result);
1217
1218 /* First, check if it already is a filesystem path */
1219 if (path_startswith(path, "/sys/fs/cgroup")) {
1220
1221 t = strdup(path);
1222 if (!t)
1223 return -ENOMEM;
1224
1225 *result = path_kill_slashes(t);
1226 return 0;
1227 }
1228
1229 /* Otherwise, treat it as cg spec */
1230 r = cg_split_spec(path, &c, &p);
1231 if (r < 0)
1232 return r;
1233
1234 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1235 }
1236
1237 int cg_get_root_path(char **path) {
1238 char *p, *e;
1239 int r;
1240
1241 assert(path);
1242
1243 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1244 if (r < 0)
1245 return r;
1246
1247 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1248 if (!e)
1249 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1250 if (!e)
1251 e = endswith(p, "/system"); /* even more legacy */
1252 if (e)
1253 *e = 0;
1254
1255 *path = p;
1256 return 0;
1257 }
1258
1259 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1260 _cleanup_free_ char *rt = NULL;
1261 char *p;
1262 int r;
1263
1264 assert(cgroup);
1265 assert(shifted);
1266
1267 if (!root) {
1268 /* If the root was specified let's use that, otherwise
1269 * let's determine it from PID 1 */
1270
1271 r = cg_get_root_path(&rt);
1272 if (r < 0)
1273 return r;
1274
1275 root = rt;
1276 }
1277
1278 p = path_startswith(cgroup, root);
1279 if (p && p > cgroup)
1280 *shifted = p - 1;
1281 else
1282 *shifted = cgroup;
1283
1284 return 0;
1285 }
1286
1287 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1288 _cleanup_free_ char *raw = NULL;
1289 const char *c;
1290 int r;
1291
1292 assert(pid >= 0);
1293 assert(cgroup);
1294
1295 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1296 if (r < 0)
1297 return r;
1298
1299 r = cg_shift_path(raw, root, &c);
1300 if (r < 0)
1301 return r;
1302
1303 if (c == raw) {
1304 *cgroup = raw;
1305 raw = NULL;
1306 } else {
1307 char *n;
1308
1309 n = strdup(c);
1310 if (!n)
1311 return -ENOMEM;
1312
1313 *cgroup = n;
1314 }
1315
1316 return 0;
1317 }
1318
1319 int cg_path_decode_unit(const char *cgroup, char **unit) {
1320 char *c, *s;
1321 size_t n;
1322
1323 assert(cgroup);
1324 assert(unit);
1325
1326 n = strcspn(cgroup, "/");
1327 if (n < 3)
1328 return -ENXIO;
1329
1330 c = strndupa(cgroup, n);
1331 c = cg_unescape(c);
1332
1333 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1334 return -ENXIO;
1335
1336 s = strdup(c);
1337 if (!s)
1338 return -ENOMEM;
1339
1340 *unit = s;
1341 return 0;
1342 }
1343
1344 static bool valid_slice_name(const char *p, size_t n) {
1345
1346 if (!p)
1347 return false;
1348
1349 if (n < strlen("x.slice"))
1350 return false;
1351
1352 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1353 char buf[n+1], *c;
1354
1355 memcpy(buf, p, n);
1356 buf[n] = 0;
1357
1358 c = cg_unescape(buf);
1359
1360 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1361 }
1362
1363 return false;
1364 }
1365
1366 static const char *skip_slices(const char *p) {
1367 assert(p);
1368
1369 /* Skips over all slice assignments */
1370
1371 for (;;) {
1372 size_t n;
1373
1374 p += strspn(p, "/");
1375
1376 n = strcspn(p, "/");
1377 if (!valid_slice_name(p, n))
1378 return p;
1379
1380 p += n;
1381 }
1382 }
1383
1384 int cg_path_get_unit(const char *path, char **ret) {
1385 const char *e;
1386 char *unit;
1387 int r;
1388
1389 assert(path);
1390 assert(ret);
1391
1392 e = skip_slices(path);
1393
1394 r = cg_path_decode_unit(e, &unit);
1395 if (r < 0)
1396 return r;
1397
1398 /* We skipped over the slices, don't accept any now */
1399 if (endswith(unit, ".slice")) {
1400 free(unit);
1401 return -ENXIO;
1402 }
1403
1404 *ret = unit;
1405 return 0;
1406 }
1407
1408 int cg_pid_get_unit(pid_t pid, char **unit) {
1409 _cleanup_free_ char *cgroup = NULL;
1410 int r;
1411
1412 assert(unit);
1413
1414 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1415 if (r < 0)
1416 return r;
1417
1418 return cg_path_get_unit(cgroup, unit);
1419 }
1420
1421 /**
1422 * Skip session-*.scope, but require it to be there.
1423 */
1424 static const char *skip_session(const char *p) {
1425 size_t n;
1426
1427 if (isempty(p))
1428 return NULL;
1429
1430 p += strspn(p, "/");
1431
1432 n = strcspn(p, "/");
1433 if (n < strlen("session-x.scope"))
1434 return NULL;
1435
1436 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1437 char buf[n - 8 - 6 + 1];
1438
1439 memcpy(buf, p + 8, n - 8 - 6);
1440 buf[n - 8 - 6] = 0;
1441
1442 /* Note that session scopes never need unescaping,
1443 * since they cannot conflict with the kernel's own
1444 * names, hence we don't need to call cg_unescape()
1445 * here. */
1446
1447 if (!session_id_valid(buf))
1448 return false;
1449
1450 p += n;
1451 p += strspn(p, "/");
1452 return p;
1453 }
1454
1455 return NULL;
1456 }
1457
1458 /**
1459 * Skip user@*.service, but require it to be there.
1460 */
1461 static const char *skip_user_manager(const char *p) {
1462 size_t n;
1463
1464 if (isempty(p))
1465 return NULL;
1466
1467 p += strspn(p, "/");
1468
1469 n = strcspn(p, "/");
1470 if (n < strlen("user@x.service"))
1471 return NULL;
1472
1473 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1474 char buf[n - 5 - 8 + 1];
1475
1476 memcpy(buf, p + 5, n - 5 - 8);
1477 buf[n - 5 - 8] = 0;
1478
1479 /* Note that user manager services never need unescaping,
1480 * since they cannot conflict with the kernel's own
1481 * names, hence we don't need to call cg_unescape()
1482 * here. */
1483
1484 if (parse_uid(buf, NULL) < 0)
1485 return NULL;
1486
1487 p += n;
1488 p += strspn(p, "/");
1489
1490 return p;
1491 }
1492
1493 return NULL;
1494 }
1495
1496 static const char *skip_user_prefix(const char *path) {
1497 const char *e, *t;
1498
1499 assert(path);
1500
1501 /* Skip slices, if there are any */
1502 e = skip_slices(path);
1503
1504 /* Skip the user manager, if it's in the path now... */
1505 t = skip_user_manager(e);
1506 if (t)
1507 return t;
1508
1509 /* Alternatively skip the user session if it is in the path... */
1510 return skip_session(e);
1511 }
1512
1513 int cg_path_get_user_unit(const char *path, char **ret) {
1514 const char *t;
1515
1516 assert(path);
1517 assert(ret);
1518
1519 t = skip_user_prefix(path);
1520 if (!t)
1521 return -ENXIO;
1522
1523 /* And from here on it looks pretty much the same as for a
1524 * system unit, hence let's use the same parser from here
1525 * on. */
1526 return cg_path_get_unit(t, ret);
1527 }
1528
1529 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1530 _cleanup_free_ char *cgroup = NULL;
1531 int r;
1532
1533 assert(unit);
1534
1535 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1536 if (r < 0)
1537 return r;
1538
1539 return cg_path_get_user_unit(cgroup, unit);
1540 }
1541
1542 int cg_path_get_machine_name(const char *path, char **machine) {
1543 _cleanup_free_ char *u = NULL;
1544 const char *sl;
1545 int r;
1546
1547 r = cg_path_get_unit(path, &u);
1548 if (r < 0)
1549 return r;
1550
1551 sl = strjoina("/run/systemd/machines/unit:", u);
1552 return readlink_malloc(sl, machine);
1553 }
1554
1555 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1556 _cleanup_free_ char *cgroup = NULL;
1557 int r;
1558
1559 assert(machine);
1560
1561 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1562 if (r < 0)
1563 return r;
1564
1565 return cg_path_get_machine_name(cgroup, machine);
1566 }
1567
1568 int cg_path_get_session(const char *path, char **session) {
1569 _cleanup_free_ char *unit = NULL;
1570 char *start, *end;
1571 int r;
1572
1573 assert(path);
1574
1575 r = cg_path_get_unit(path, &unit);
1576 if (r < 0)
1577 return r;
1578
1579 start = startswith(unit, "session-");
1580 if (!start)
1581 return -ENXIO;
1582 end = endswith(start, ".scope");
1583 if (!end)
1584 return -ENXIO;
1585
1586 *end = 0;
1587 if (!session_id_valid(start))
1588 return -ENXIO;
1589
1590 if (session) {
1591 char *rr;
1592
1593 rr = strdup(start);
1594 if (!rr)
1595 return -ENOMEM;
1596
1597 *session = rr;
1598 }
1599
1600 return 0;
1601 }
1602
1603 int cg_pid_get_session(pid_t pid, char **session) {
1604 _cleanup_free_ char *cgroup = NULL;
1605 int r;
1606
1607 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1608 if (r < 0)
1609 return r;
1610
1611 return cg_path_get_session(cgroup, session);
1612 }
1613
1614 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1615 _cleanup_free_ char *slice = NULL;
1616 char *start, *end;
1617 int r;
1618
1619 assert(path);
1620
1621 r = cg_path_get_slice(path, &slice);
1622 if (r < 0)
1623 return r;
1624
1625 start = startswith(slice, "user-");
1626 if (!start)
1627 return -ENXIO;
1628 end = endswith(start, ".slice");
1629 if (!end)
1630 return -ENXIO;
1631
1632 *end = 0;
1633 if (parse_uid(start, uid) < 0)
1634 return -ENXIO;
1635
1636 return 0;
1637 }
1638
1639 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1640 _cleanup_free_ char *cgroup = NULL;
1641 int r;
1642
1643 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1644 if (r < 0)
1645 return r;
1646
1647 return cg_path_get_owner_uid(cgroup, uid);
1648 }
1649
1650 int cg_path_get_slice(const char *p, char **slice) {
1651 const char *e = NULL;
1652
1653 assert(p);
1654 assert(slice);
1655
1656 /* Finds the right-most slice unit from the beginning, but
1657 * stops before we come to the first non-slice unit. */
1658
1659 for (;;) {
1660 size_t n;
1661
1662 p += strspn(p, "/");
1663
1664 n = strcspn(p, "/");
1665 if (!valid_slice_name(p, n)) {
1666
1667 if (!e) {
1668 char *s;
1669
1670 s = strdup("-.slice");
1671 if (!s)
1672 return -ENOMEM;
1673
1674 *slice = s;
1675 return 0;
1676 }
1677
1678 return cg_path_decode_unit(e, slice);
1679 }
1680
1681 e = p;
1682 p += n;
1683 }
1684 }
1685
1686 int cg_pid_get_slice(pid_t pid, char **slice) {
1687 _cleanup_free_ char *cgroup = NULL;
1688 int r;
1689
1690 assert(slice);
1691
1692 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1693 if (r < 0)
1694 return r;
1695
1696 return cg_path_get_slice(cgroup, slice);
1697 }
1698
1699 int cg_path_get_user_slice(const char *p, char **slice) {
1700 const char *t;
1701 assert(p);
1702 assert(slice);
1703
1704 t = skip_user_prefix(p);
1705 if (!t)
1706 return -ENXIO;
1707
1708 /* And now it looks pretty much the same as for a system
1709 * slice, so let's just use the same parser from here on. */
1710 return cg_path_get_slice(t, slice);
1711 }
1712
1713 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1714 _cleanup_free_ char *cgroup = NULL;
1715 int r;
1716
1717 assert(slice);
1718
1719 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1720 if (r < 0)
1721 return r;
1722
1723 return cg_path_get_user_slice(cgroup, slice);
1724 }
1725
1726 char *cg_escape(const char *p) {
1727 bool need_prefix = false;
1728
1729 /* This implements very minimal escaping for names to be used
1730 * as file names in the cgroup tree: any name which might
1731 * conflict with a kernel name or is prefixed with '_' is
1732 * prefixed with a '_'. That way, when reading cgroup names it
1733 * is sufficient to remove a single prefixing underscore if
1734 * there is one. */
1735
1736 /* The return value of this function (unlike cg_unescape())
1737 * needs free()! */
1738
1739 if (p[0] == 0 ||
1740 p[0] == '_' ||
1741 p[0] == '.' ||
1742 streq(p, "notify_on_release") ||
1743 streq(p, "release_agent") ||
1744 streq(p, "tasks") ||
1745 startswith(p, "cgroup."))
1746 need_prefix = true;
1747 else {
1748 const char *dot;
1749
1750 dot = strrchr(p, '.');
1751 if (dot) {
1752 CGroupController c;
1753 size_t l = dot - p;
1754
1755 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1756 const char *n;
1757
1758 n = cgroup_controller_to_string(c);
1759
1760 if (l != strlen(n))
1761 continue;
1762
1763 if (memcmp(p, n, l) != 0)
1764 continue;
1765
1766 need_prefix = true;
1767 break;
1768 }
1769 }
1770 }
1771
1772 if (need_prefix)
1773 return strappend("_", p);
1774
1775 return strdup(p);
1776 }
1777
1778 char *cg_unescape(const char *p) {
1779 assert(p);
1780
1781 /* The return value of this function (unlike cg_escape())
1782 * doesn't need free()! */
1783
1784 if (p[0] == '_')
1785 return (char*) p+1;
1786
1787 return (char*) p;
1788 }
1789
1790 #define CONTROLLER_VALID \
1791 DIGITS LETTERS \
1792 "_"
1793
1794 bool cg_controller_is_valid(const char *p) {
1795 const char *t, *s;
1796
1797 if (!p)
1798 return false;
1799
1800 s = startswith(p, "name=");
1801 if (s)
1802 p = s;
1803
1804 if (*p == 0 || *p == '_')
1805 return false;
1806
1807 for (t = p; *t; t++)
1808 if (!strchr(CONTROLLER_VALID, *t))
1809 return false;
1810
1811 if (t - p > FILENAME_MAX)
1812 return false;
1813
1814 return true;
1815 }
1816
1817 int cg_slice_to_path(const char *unit, char **ret) {
1818 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1819 const char *dash;
1820 int r;
1821
1822 assert(unit);
1823 assert(ret);
1824
1825 if (streq(unit, "-.slice")) {
1826 char *x;
1827
1828 x = strdup("");
1829 if (!x)
1830 return -ENOMEM;
1831 *ret = x;
1832 return 0;
1833 }
1834
1835 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1836 return -EINVAL;
1837
1838 if (!endswith(unit, ".slice"))
1839 return -EINVAL;
1840
1841 r = unit_name_to_prefix(unit, &p);
1842 if (r < 0)
1843 return r;
1844
1845 dash = strchr(p, '-');
1846
1847 /* Don't allow initial dashes */
1848 if (dash == p)
1849 return -EINVAL;
1850
1851 while (dash) {
1852 _cleanup_free_ char *escaped = NULL;
1853 char n[dash - p + sizeof(".slice")];
1854
1855 /* Don't allow trailing or double dashes */
1856 if (dash[1] == 0 || dash[1] == '-')
1857 return -EINVAL;
1858
1859 strcpy(stpncpy(n, p, dash - p), ".slice");
1860 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1861 return -EINVAL;
1862
1863 escaped = cg_escape(n);
1864 if (!escaped)
1865 return -ENOMEM;
1866
1867 if (!strextend(&s, escaped, "/", NULL))
1868 return -ENOMEM;
1869
1870 dash = strchr(dash+1, '-');
1871 }
1872
1873 e = cg_escape(unit);
1874 if (!e)
1875 return -ENOMEM;
1876
1877 if (!strextend(&s, e, NULL))
1878 return -ENOMEM;
1879
1880 *ret = s;
1881 s = NULL;
1882
1883 return 0;
1884 }
1885
1886 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1887 _cleanup_free_ char *p = NULL;
1888 int r;
1889
1890 r = cg_get_path(controller, path, attribute, &p);
1891 if (r < 0)
1892 return r;
1893
1894 return write_string_file(p, value, 0);
1895 }
1896
1897 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1898 _cleanup_free_ char *p = NULL;
1899 int r;
1900
1901 r = cg_get_path(controller, path, attribute, &p);
1902 if (r < 0)
1903 return r;
1904
1905 return read_one_line_file(p, ret);
1906 }
1907
1908 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1909 CGroupController c;
1910 int r, unified;
1911
1912 /* This one will create a cgroup in our private tree, but also
1913 * duplicate it in the trees specified in mask, and remove it
1914 * in all others */
1915
1916 /* First create the cgroup in our own hierarchy. */
1917 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1918 if (r < 0)
1919 return r;
1920
1921 /* If we are in the unified hierarchy, we are done now */
1922 unified = cg_unified();
1923 if (unified < 0)
1924 return unified;
1925 if (unified > 0)
1926 return 0;
1927
1928 /* Otherwise, do the same in the other hierarchies */
1929 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1930 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1931 const char *n;
1932
1933 n = cgroup_controller_to_string(c);
1934
1935 if (mask & bit)
1936 (void) cg_create(n, path);
1937 else if (supported & bit)
1938 (void) cg_trim(n, path, true);
1939 }
1940
1941 return 0;
1942 }
1943
1944 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1945 CGroupController c;
1946 int r, unified;
1947
1948 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1949 if (r < 0)
1950 return r;
1951
1952 unified = cg_unified();
1953 if (unified < 0)
1954 return unified;
1955 if (unified > 0)
1956 return 0;
1957
1958 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1959 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1960 const char *p = NULL;
1961
1962 if (!(supported & bit))
1963 continue;
1964
1965 if (path_callback)
1966 p = path_callback(bit, userdata);
1967
1968 if (!p)
1969 p = path;
1970
1971 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1972 }
1973
1974 return 0;
1975 }
1976
1977 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1978 Iterator i;
1979 void *pidp;
1980 int r = 0;
1981
1982 SET_FOREACH(pidp, pids, i) {
1983 pid_t pid = PTR_TO_PID(pidp);
1984 int q;
1985
1986 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1987 if (q < 0 && r >= 0)
1988 r = q;
1989 }
1990
1991 return r;
1992 }
1993
1994 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1995 CGroupController c;
1996 int r = 0, unified;
1997
1998 if (!path_equal(from, to)) {
1999 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2000 if (r < 0)
2001 return r;
2002 }
2003
2004 unified = cg_unified();
2005 if (unified < 0)
2006 return unified;
2007 if (unified > 0)
2008 return r;
2009
2010 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2011 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2012 const char *p = NULL;
2013
2014 if (!(supported & bit))
2015 continue;
2016
2017 if (to_callback)
2018 p = to_callback(bit, userdata);
2019
2020 if (!p)
2021 p = to;
2022
2023 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2024 }
2025
2026 return 0;
2027 }
2028
2029 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2030 CGroupController c;
2031 int r, unified;
2032
2033 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2034 if (r < 0)
2035 return r;
2036
2037 unified = cg_unified();
2038 if (unified < 0)
2039 return unified;
2040 if (unified > 0)
2041 return r;
2042
2043 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2044 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2045
2046 if (!(supported & bit))
2047 continue;
2048
2049 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2050 }
2051
2052 return 0;
2053 }
2054
2055 int cg_mask_supported(CGroupMask *ret) {
2056 CGroupMask mask = 0;
2057 int r, unified;
2058
2059 /* Determines the mask of supported cgroup controllers. Only
2060 * includes controllers we can make sense of and that are
2061 * actually accessible. */
2062
2063 unified = cg_unified();
2064 if (unified < 0)
2065 return unified;
2066 if (unified > 0) {
2067 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2068 const char *c;
2069
2070 /* In the unified hierarchy we can read the supported
2071 * and accessible controllers from a the top-level
2072 * cgroup attribute */
2073
2074 r = cg_get_root_path(&root);
2075 if (r < 0)
2076 return r;
2077
2078 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2079 if (r < 0)
2080 return r;
2081
2082 r = read_one_line_file(path, &controllers);
2083 if (r < 0)
2084 return r;
2085
2086 c = controllers;
2087 for (;;) {
2088 _cleanup_free_ char *n = NULL;
2089 CGroupController v;
2090
2091 r = extract_first_word(&c, &n, NULL, 0);
2092 if (r < 0)
2093 return r;
2094 if (r == 0)
2095 break;
2096
2097 v = cgroup_controller_from_string(n);
2098 if (v < 0)
2099 continue;
2100
2101 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2102 }
2103
2104 /* Currently, we only support the memory, io and pids
2105 * controller in the unified hierarchy, mask
2106 * everything else off. */
2107 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2108
2109 } else {
2110 CGroupController c;
2111
2112 /* In the legacy hierarchy, we check whether which
2113 * hierarchies are mounted. */
2114
2115 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2116 const char *n;
2117
2118 n = cgroup_controller_to_string(c);
2119 if (controller_is_accessible(n) >= 0)
2120 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2121 }
2122 }
2123
2124 *ret = mask;
2125 return 0;
2126 }
2127
2128 int cg_kernel_controllers(Set *controllers) {
2129 _cleanup_fclose_ FILE *f = NULL;
2130 char buf[LINE_MAX];
2131 int r;
2132
2133 assert(controllers);
2134
2135 /* Determines the full list of kernel-known controllers. Might
2136 * include controllers we don't actually support, arbitrary
2137 * named hierarchies and controllers that aren't currently
2138 * accessible (because not mounted). */
2139
2140 f = fopen("/proc/cgroups", "re");
2141 if (!f) {
2142 if (errno == ENOENT)
2143 return 0;
2144 return -errno;
2145 }
2146
2147 /* Ignore the header line */
2148 (void) fgets(buf, sizeof(buf), f);
2149
2150 for (;;) {
2151 char *controller;
2152 int enabled = 0;
2153
2154 errno = 0;
2155 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2156
2157 if (feof(f))
2158 break;
2159
2160 if (ferror(f) && errno > 0)
2161 return -errno;
2162
2163 return -EBADMSG;
2164 }
2165
2166 if (!enabled) {
2167 free(controller);
2168 continue;
2169 }
2170
2171 if (!cg_controller_is_valid(controller)) {
2172 free(controller);
2173 return -EBADMSG;
2174 }
2175
2176 r = set_consume(controllers, controller);
2177 if (r < 0)
2178 return r;
2179 }
2180
2181 return 0;
2182 }
2183
2184 static thread_local int unified_cache = -1;
2185
2186 int cg_unified(void) {
2187 struct statfs fs;
2188
2189 /* Checks if we support the unified hierarchy. Returns an
2190 * error when the cgroup hierarchies aren't mounted yet or we
2191 * have any other trouble determining if the unified hierarchy
2192 * is supported. */
2193
2194 if (unified_cache >= 0)
2195 return unified_cache;
2196
2197 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2198 return -errno;
2199
2200 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2201 unified_cache = true;
2202 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2203 unified_cache = false;
2204 else
2205 return -ENOMEDIUM;
2206
2207 return unified_cache;
2208 }
2209
2210 void cg_unified_flush(void) {
2211 unified_cache = -1;
2212 }
2213
2214 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2215 _cleanup_free_ char *fs = NULL;
2216 CGroupController c;
2217 int r, unified;
2218
2219 assert(p);
2220
2221 if (supported == 0)
2222 return 0;
2223
2224 unified = cg_unified();
2225 if (unified < 0)
2226 return unified;
2227 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2228 return 0;
2229
2230 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2231 if (r < 0)
2232 return r;
2233
2234 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2235 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2236 const char *n;
2237
2238 if (!(supported & bit))
2239 continue;
2240
2241 n = cgroup_controller_to_string(c);
2242 {
2243 char s[1 + strlen(n) + 1];
2244
2245 s[0] = mask & bit ? '+' : '-';
2246 strcpy(s + 1, n);
2247
2248 r = write_string_file(fs, s, 0);
2249 if (r < 0)
2250 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2251 }
2252 }
2253
2254 return 0;
2255 }
2256
2257 bool cg_is_unified_wanted(void) {
2258 static thread_local int wanted = -1;
2259 int r, unified;
2260
2261 /* If the hierarchy is already mounted, then follow whatever
2262 * was chosen for it. */
2263 unified = cg_unified();
2264 if (unified >= 0)
2265 return unified;
2266
2267 /* Otherwise, let's see what the kernel command line has to
2268 * say. Since checking that is expensive, let's cache the
2269 * result. */
2270 if (wanted >= 0)
2271 return wanted;
2272
2273 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2274 if (r > 0)
2275 return (wanted = true);
2276 else {
2277 _cleanup_free_ char *value = NULL;
2278
2279 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2280 if (r < 0)
2281 return false;
2282 if (r == 0)
2283 return (wanted = false);
2284
2285 return (wanted = parse_boolean(value) > 0);
2286 }
2287 }
2288
2289 bool cg_is_legacy_wanted(void) {
2290 return !cg_is_unified_wanted();
2291 }
2292
2293 int cg_weight_parse(const char *s, uint64_t *ret) {
2294 uint64_t u;
2295 int r;
2296
2297 if (isempty(s)) {
2298 *ret = CGROUP_WEIGHT_INVALID;
2299 return 0;
2300 }
2301
2302 r = safe_atou64(s, &u);
2303 if (r < 0)
2304 return r;
2305
2306 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2307 return -ERANGE;
2308
2309 *ret = u;
2310 return 0;
2311 }
2312
2313 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2314 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2315 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2316 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2317 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2318 };
2319
2320 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2321 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2322 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2323 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2324 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2325 };
2326
2327 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2328
2329 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2330 uint64_t u;
2331 int r;
2332
2333 if (isempty(s)) {
2334 *ret = CGROUP_CPU_SHARES_INVALID;
2335 return 0;
2336 }
2337
2338 r = safe_atou64(s, &u);
2339 if (r < 0)
2340 return r;
2341
2342 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2343 return -ERANGE;
2344
2345 *ret = u;
2346 return 0;
2347 }
2348
2349 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2350 uint64_t u;
2351 int r;
2352
2353 if (isempty(s)) {
2354 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2355 return 0;
2356 }
2357
2358 r = safe_atou64(s, &u);
2359 if (r < 0)
2360 return r;
2361
2362 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2363 return -ERANGE;
2364
2365 *ret = u;
2366 return 0;
2367 }
2368
2369 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2370 [CGROUP_CONTROLLER_CPU] = "cpu",
2371 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2372 [CGROUP_CONTROLLER_IO] = "io",
2373 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2374 [CGROUP_CONTROLLER_MEMORY] = "memory",
2375 [CGROUP_CONTROLLER_DEVICES] = "devices",
2376 [CGROUP_CONTROLLER_PIDS] = "pids",
2377 };
2378
2379 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);