]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #4001 from clintonroy/master
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <ftw.h>
23 #include <limits.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
35 #include "def.h"
36 #include "dirent-util.h"
37 #include "extract-word.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "formats-util.h"
41 #include "fs-util.h"
42 #include "log.h"
43 #include "login-util.h"
44 #include "macro.h"
45 #include "missing.h"
46 #include "mkdir.h"
47 #include "parse-util.h"
48 #include "path-util.h"
49 #include "proc-cmdline.h"
50 #include "process-util.h"
51 #include "set.h"
52 #include "special.h"
53 #include "stat-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "unit-name.h"
58 #include "user-util.h"
59
60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
61 _cleanup_free_ char *fs = NULL;
62 FILE *f;
63 int r;
64
65 assert(_f);
66
67 r = cg_get_path(controller, path, "cgroup.procs", &fs);
68 if (r < 0)
69 return r;
70
71 f = fopen(fs, "re");
72 if (!f)
73 return -errno;
74
75 *_f = f;
76 return 0;
77 }
78
79 int cg_read_pid(FILE *f, pid_t *_pid) {
80 unsigned long ul;
81
82 /* Note that the cgroup.procs might contain duplicates! See
83 * cgroups.txt for details. */
84
85 assert(f);
86 assert(_pid);
87
88 errno = 0;
89 if (fscanf(f, "%lu", &ul) != 1) {
90
91 if (feof(f))
92 return 0;
93
94 return errno > 0 ? -errno : -EIO;
95 }
96
97 if (ul <= 0)
98 return -EIO;
99
100 *_pid = (pid_t) ul;
101 return 1;
102 }
103
104 int cg_read_event(const char *controller, const char *path, const char *event,
105 char **val)
106 {
107 _cleanup_free_ char *events = NULL, *content = NULL;
108 char *p, *line;
109 int r;
110
111 r = cg_get_path(controller, path, "cgroup.events", &events);
112 if (r < 0)
113 return r;
114
115 r = read_full_file(events, &content, NULL);
116 if (r < 0)
117 return r;
118
119 p = content;
120 while ((line = strsep(&p, "\n"))) {
121 char *key;
122
123 key = strsep(&line, " ");
124 if (!key || !line)
125 return -EINVAL;
126
127 if (strcmp(key, event))
128 continue;
129
130 *val = strdup(line);
131 return 0;
132 }
133
134 return -ENOENT;
135 }
136
137 bool cg_ns_supported(void) {
138 static thread_local int enabled = -1;
139
140 if (enabled >= 0)
141 return enabled;
142
143 if (access("/proc/self/ns/cgroup", F_OK) == 0)
144 enabled = 1;
145 else
146 enabled = 0;
147
148 return enabled;
149 }
150
151 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
152 _cleanup_free_ char *fs = NULL;
153 int r;
154 DIR *d;
155
156 assert(_d);
157
158 /* This is not recursive! */
159
160 r = cg_get_path(controller, path, NULL, &fs);
161 if (r < 0)
162 return r;
163
164 d = opendir(fs);
165 if (!d)
166 return -errno;
167
168 *_d = d;
169 return 0;
170 }
171
172 int cg_read_subgroup(DIR *d, char **fn) {
173 struct dirent *de;
174
175 assert(d);
176 assert(fn);
177
178 FOREACH_DIRENT_ALL(de, d, return -errno) {
179 char *b;
180
181 if (de->d_type != DT_DIR)
182 continue;
183
184 if (streq(de->d_name, ".") ||
185 streq(de->d_name, ".."))
186 continue;
187
188 b = strdup(de->d_name);
189 if (!b)
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
196 return 0;
197 }
198
199 int cg_rmdir(const char *controller, const char *path) {
200 _cleanup_free_ char *p = NULL;
201 int r;
202
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
205 return r;
206
207 r = rmdir(p);
208 if (r < 0 && errno != ENOENT)
209 return -errno;
210
211 return 0;
212 }
213
214 int cg_kill(
215 const char *controller,
216 const char *path,
217 int sig,
218 CGroupFlags flags,
219 Set *s,
220 cg_kill_log_func_t log_kill,
221 void *userdata) {
222
223 _cleanup_set_free_ Set *allocated_set = NULL;
224 bool done = false;
225 int r, ret = 0;
226 pid_t my_pid;
227
228 assert(sig >= 0);
229
230 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
231 * SIGCONT on SIGKILL. */
232 if (IN_SET(sig, SIGCONT, SIGKILL))
233 flags &= ~CGROUP_SIGCONT;
234
235 /* This goes through the tasks list and kills them all. This
236 * is repeated until no further processes are added to the
237 * tasks list, to properly handle forking processes */
238
239 if (!s) {
240 s = allocated_set = set_new(NULL);
241 if (!s)
242 return -ENOMEM;
243 }
244
245 my_pid = getpid();
246
247 do {
248 _cleanup_fclose_ FILE *f = NULL;
249 pid_t pid = 0;
250 done = true;
251
252 r = cg_enumerate_processes(controller, path, &f);
253 if (r < 0) {
254 if (ret >= 0 && r != -ENOENT)
255 return r;
256
257 return ret;
258 }
259
260 while ((r = cg_read_pid(f, &pid)) > 0) {
261
262 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
263 continue;
264
265 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
266 continue;
267
268 if (log_kill)
269 log_kill(pid, sig, userdata);
270
271 /* If we haven't killed this process yet, kill
272 * it */
273 if (kill(pid, sig) < 0) {
274 if (ret >= 0 && errno != ESRCH)
275 ret = -errno;
276 } else {
277 if (flags & CGROUP_SIGCONT)
278 (void) kill(pid, SIGCONT);
279
280 if (ret == 0)
281 ret = 1;
282 }
283
284 done = false;
285
286 r = set_put(s, PID_TO_PTR(pid));
287 if (r < 0) {
288 if (ret >= 0)
289 return r;
290
291 return ret;
292 }
293 }
294
295 if (r < 0) {
296 if (ret >= 0)
297 return r;
298
299 return ret;
300 }
301
302 /* To avoid racing against processes which fork
303 * quicker than we can kill them we repeat this until
304 * no new pids need to be killed. */
305
306 } while (!done);
307
308 return ret;
309 }
310
311 int cg_kill_recursive(
312 const char *controller,
313 const char *path,
314 int sig,
315 CGroupFlags flags,
316 Set *s,
317 cg_kill_log_func_t log_kill,
318 void *userdata) {
319
320 _cleanup_set_free_ Set *allocated_set = NULL;
321 _cleanup_closedir_ DIR *d = NULL;
322 int r, ret;
323 char *fn;
324
325 assert(path);
326 assert(sig >= 0);
327
328 if (!s) {
329 s = allocated_set = set_new(NULL);
330 if (!s)
331 return -ENOMEM;
332 }
333
334 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
335
336 r = cg_enumerate_subgroups(controller, path, &d);
337 if (r < 0) {
338 if (ret >= 0 && r != -ENOENT)
339 return r;
340
341 return ret;
342 }
343
344 while ((r = cg_read_subgroup(d, &fn)) > 0) {
345 _cleanup_free_ char *p = NULL;
346
347 p = strjoin(path, "/", fn, NULL);
348 free(fn);
349 if (!p)
350 return -ENOMEM;
351
352 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
353 if (r != 0 && ret >= 0)
354 ret = r;
355 }
356 if (ret >= 0 && r < 0)
357 ret = r;
358
359 if (flags & CGROUP_REMOVE) {
360 r = cg_rmdir(controller, path);
361 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
362 return r;
363 }
364
365 return ret;
366 }
367
368 int cg_migrate(
369 const char *cfrom,
370 const char *pfrom,
371 const char *cto,
372 const char *pto,
373 CGroupFlags flags) {
374
375 bool done = false;
376 _cleanup_set_free_ Set *s = NULL;
377 int r, ret = 0;
378 pid_t my_pid;
379
380 assert(cfrom);
381 assert(pfrom);
382 assert(cto);
383 assert(pto);
384
385 s = set_new(NULL);
386 if (!s)
387 return -ENOMEM;
388
389 my_pid = getpid();
390
391 do {
392 _cleanup_fclose_ FILE *f = NULL;
393 pid_t pid = 0;
394 done = true;
395
396 r = cg_enumerate_processes(cfrom, pfrom, &f);
397 if (r < 0) {
398 if (ret >= 0 && r != -ENOENT)
399 return r;
400
401 return ret;
402 }
403
404 while ((r = cg_read_pid(f, &pid)) > 0) {
405
406 /* This might do weird stuff if we aren't a
407 * single-threaded program. However, we
408 * luckily know we are not */
409 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
410 continue;
411
412 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
413 continue;
414
415 /* Ignore kernel threads. Since they can only
416 * exist in the root cgroup, we only check for
417 * them there. */
418 if (cfrom &&
419 (isempty(pfrom) || path_equal(pfrom, "/")) &&
420 is_kernel_thread(pid) > 0)
421 continue;
422
423 r = cg_attach(cto, pto, pid);
424 if (r < 0) {
425 if (ret >= 0 && r != -ESRCH)
426 ret = r;
427 } else if (ret == 0)
428 ret = 1;
429
430 done = false;
431
432 r = set_put(s, PID_TO_PTR(pid));
433 if (r < 0) {
434 if (ret >= 0)
435 return r;
436
437 return ret;
438 }
439 }
440
441 if (r < 0) {
442 if (ret >= 0)
443 return r;
444
445 return ret;
446 }
447 } while (!done);
448
449 return ret;
450 }
451
452 int cg_migrate_recursive(
453 const char *cfrom,
454 const char *pfrom,
455 const char *cto,
456 const char *pto,
457 CGroupFlags flags) {
458
459 _cleanup_closedir_ DIR *d = NULL;
460 int r, ret = 0;
461 char *fn;
462
463 assert(cfrom);
464 assert(pfrom);
465 assert(cto);
466 assert(pto);
467
468 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
469
470 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
471 if (r < 0) {
472 if (ret >= 0 && r != -ENOENT)
473 return r;
474
475 return ret;
476 }
477
478 while ((r = cg_read_subgroup(d, &fn)) > 0) {
479 _cleanup_free_ char *p = NULL;
480
481 p = strjoin(pfrom, "/", fn, NULL);
482 free(fn);
483 if (!p)
484 return -ENOMEM;
485
486 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
487 if (r != 0 && ret >= 0)
488 ret = r;
489 }
490
491 if (r < 0 && ret >= 0)
492 ret = r;
493
494 if (flags & CGROUP_REMOVE) {
495 r = cg_rmdir(cfrom, pfrom);
496 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
497 return r;
498 }
499
500 return ret;
501 }
502
503 int cg_migrate_recursive_fallback(
504 const char *cfrom,
505 const char *pfrom,
506 const char *cto,
507 const char *pto,
508 CGroupFlags flags) {
509
510 int r;
511
512 assert(cfrom);
513 assert(pfrom);
514 assert(cto);
515 assert(pto);
516
517 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
518 if (r < 0) {
519 char prefix[strlen(pto) + 1];
520
521 /* This didn't work? Then let's try all prefixes of the destination */
522
523 PATH_FOREACH_PREFIX(prefix, pto) {
524 int q;
525
526 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
527 if (q >= 0)
528 return q;
529 }
530 }
531
532 return r;
533 }
534
535 static const char *controller_to_dirname(const char *controller) {
536 const char *e;
537
538 assert(controller);
539
540 /* Converts a controller name to the directory name below
541 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
542 * just cuts off the name= prefixed used for named
543 * hierarchies, if it is specified. */
544
545 e = startswith(controller, "name=");
546 if (e)
547 return e;
548
549 return controller;
550 }
551
552 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
553 const char *dn;
554 char *t = NULL;
555
556 assert(fs);
557 assert(controller);
558
559 dn = controller_to_dirname(controller);
560
561 if (isempty(path) && isempty(suffix))
562 t = strappend("/sys/fs/cgroup/", dn);
563 else if (isempty(path))
564 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
565 else if (isempty(suffix))
566 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
567 else
568 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
569 if (!t)
570 return -ENOMEM;
571
572 *fs = t;
573 return 0;
574 }
575
576 static int join_path_unified(const char *path, const char *suffix, char **fs) {
577 char *t;
578
579 assert(fs);
580
581 if (isempty(path) && isempty(suffix))
582 t = strdup("/sys/fs/cgroup");
583 else if (isempty(path))
584 t = strappend("/sys/fs/cgroup/", suffix);
585 else if (isempty(suffix))
586 t = strappend("/sys/fs/cgroup/", path);
587 else
588 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
589 if (!t)
590 return -ENOMEM;
591
592 *fs = t;
593 return 0;
594 }
595
596 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
597 int unified, r;
598
599 assert(fs);
600
601 if (!controller) {
602 char *t;
603
604 /* If no controller is specified, we return the path
605 * *below* the controllers, without any prefix. */
606
607 if (!path && !suffix)
608 return -EINVAL;
609
610 if (!suffix)
611 t = strdup(path);
612 else if (!path)
613 t = strdup(suffix);
614 else
615 t = strjoin(path, "/", suffix, NULL);
616 if (!t)
617 return -ENOMEM;
618
619 *fs = path_kill_slashes(t);
620 return 0;
621 }
622
623 if (!cg_controller_is_valid(controller))
624 return -EINVAL;
625
626 unified = cg_all_unified();
627 if (unified < 0)
628 return unified;
629
630 if (unified > 0)
631 r = join_path_unified(path, suffix, fs);
632 else
633 r = join_path_legacy(controller, path, suffix, fs);
634 if (r < 0)
635 return r;
636
637 path_kill_slashes(*fs);
638 return 0;
639 }
640
641 static int controller_is_accessible(const char *controller) {
642 int unified;
643
644 assert(controller);
645
646 /* Checks whether a specific controller is accessible,
647 * i.e. its hierarchy mounted. In the unified hierarchy all
648 * controllers are considered accessible, except for the named
649 * hierarchies */
650
651 if (!cg_controller_is_valid(controller))
652 return -EINVAL;
653
654 unified = cg_all_unified();
655 if (unified < 0)
656 return unified;
657 if (unified > 0) {
658 /* We don't support named hierarchies if we are using
659 * the unified hierarchy. */
660
661 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
662 return 0;
663
664 if (startswith(controller, "name="))
665 return -EOPNOTSUPP;
666
667 } else {
668 const char *cc, *dn;
669
670 dn = controller_to_dirname(controller);
671 cc = strjoina("/sys/fs/cgroup/", dn);
672
673 if (laccess(cc, F_OK) < 0)
674 return -errno;
675 }
676
677 return 0;
678 }
679
680 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
681 int r;
682
683 assert(controller);
684 assert(fs);
685
686 /* Check if the specified controller is actually accessible */
687 r = controller_is_accessible(controller);
688 if (r < 0)
689 return r;
690
691 return cg_get_path(controller, path, suffix, fs);
692 }
693
694 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
695 assert(path);
696 assert(sb);
697 assert(ftwbuf);
698
699 if (typeflag != FTW_DP)
700 return 0;
701
702 if (ftwbuf->level < 1)
703 return 0;
704
705 (void) rmdir(path);
706 return 0;
707 }
708
709 int cg_trim(const char *controller, const char *path, bool delete_root) {
710 _cleanup_free_ char *fs = NULL;
711 int r = 0;
712
713 assert(path);
714
715 r = cg_get_path(controller, path, NULL, &fs);
716 if (r < 0)
717 return r;
718
719 errno = 0;
720 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
721 if (errno == ENOENT)
722 r = 0;
723 else if (errno > 0)
724 r = -errno;
725 else
726 r = -EIO;
727 }
728
729 if (delete_root) {
730 if (rmdir(fs) < 0 && errno != ENOENT)
731 return -errno;
732 }
733
734 return r;
735 }
736
737 int cg_create(const char *controller, const char *path) {
738 _cleanup_free_ char *fs = NULL;
739 int r;
740
741 r = cg_get_path_and_check(controller, path, NULL, &fs);
742 if (r < 0)
743 return r;
744
745 r = mkdir_parents(fs, 0755);
746 if (r < 0)
747 return r;
748
749 if (mkdir(fs, 0755) < 0) {
750
751 if (errno == EEXIST)
752 return 0;
753
754 return -errno;
755 }
756
757 return 1;
758 }
759
760 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
761 int r, q;
762
763 assert(pid >= 0);
764
765 r = cg_create(controller, path);
766 if (r < 0)
767 return r;
768
769 q = cg_attach(controller, path, pid);
770 if (q < 0)
771 return q;
772
773 /* This does not remove the cgroup on failure */
774 return r;
775 }
776
777 int cg_attach(const char *controller, const char *path, pid_t pid) {
778 _cleanup_free_ char *fs = NULL;
779 char c[DECIMAL_STR_MAX(pid_t) + 2];
780 int r;
781
782 assert(path);
783 assert(pid >= 0);
784
785 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
786 if (r < 0)
787 return r;
788
789 if (pid == 0)
790 pid = getpid();
791
792 xsprintf(c, PID_FMT "\n", pid);
793
794 return write_string_file(fs, c, 0);
795 }
796
797 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
798 int r;
799
800 assert(controller);
801 assert(path);
802 assert(pid >= 0);
803
804 r = cg_attach(controller, path, pid);
805 if (r < 0) {
806 char prefix[strlen(path) + 1];
807
808 /* This didn't work? Then let's try all prefixes of
809 * the destination */
810
811 PATH_FOREACH_PREFIX(prefix, path) {
812 int q;
813
814 q = cg_attach(controller, prefix, pid);
815 if (q >= 0)
816 return q;
817 }
818 }
819
820 return r;
821 }
822
823 int cg_set_group_access(
824 const char *controller,
825 const char *path,
826 mode_t mode,
827 uid_t uid,
828 gid_t gid) {
829
830 _cleanup_free_ char *fs = NULL;
831 int r;
832
833 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
834 return 0;
835
836 if (mode != MODE_INVALID)
837 mode &= 0777;
838
839 r = cg_get_path(controller, path, NULL, &fs);
840 if (r < 0)
841 return r;
842
843 return chmod_and_chown(fs, mode, uid, gid);
844 }
845
846 int cg_set_task_access(
847 const char *controller,
848 const char *path,
849 mode_t mode,
850 uid_t uid,
851 gid_t gid) {
852
853 _cleanup_free_ char *fs = NULL, *procs = NULL;
854 int r, unified;
855
856 assert(path);
857
858 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
859 return 0;
860
861 if (mode != MODE_INVALID)
862 mode &= 0666;
863
864 r = cg_get_path(controller, path, "cgroup.procs", &fs);
865 if (r < 0)
866 return r;
867
868 r = chmod_and_chown(fs, mode, uid, gid);
869 if (r < 0)
870 return r;
871
872 unified = cg_unified(controller);
873 if (unified < 0)
874 return unified;
875 if (unified)
876 return 0;
877
878 /* Compatibility, Always keep values for "tasks" in sync with
879 * "cgroup.procs" */
880 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
881 (void) chmod_and_chown(procs, mode, uid, gid);
882
883 return 0;
884 }
885
886 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
887 _cleanup_fclose_ FILE *f = NULL;
888 char line[LINE_MAX];
889 const char *fs;
890 size_t cs = 0;
891 int unified;
892
893 assert(path);
894 assert(pid >= 0);
895
896 if (controller) {
897 if (!cg_controller_is_valid(controller))
898 return -EINVAL;
899 } else
900 controller = SYSTEMD_CGROUP_CONTROLLER;
901
902 unified = cg_unified(controller);
903 if (unified < 0)
904 return unified;
905 if (unified == 0)
906 cs = strlen(controller);
907
908 fs = procfs_file_alloca(pid, "cgroup");
909 f = fopen(fs, "re");
910 if (!f)
911 return errno == ENOENT ? -ESRCH : -errno;
912
913 FOREACH_LINE(line, f, return -errno) {
914 char *e, *p;
915
916 truncate_nl(line);
917
918 if (unified) {
919 e = startswith(line, "0:");
920 if (!e)
921 continue;
922
923 e = strchr(e, ':');
924 if (!e)
925 continue;
926 } else {
927 char *l;
928 size_t k;
929 const char *word, *state;
930 bool found = false;
931
932 l = strchr(line, ':');
933 if (!l)
934 continue;
935
936 l++;
937 e = strchr(l, ':');
938 if (!e)
939 continue;
940
941 *e = 0;
942 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
943 if (k == cs && memcmp(word, controller, cs) == 0) {
944 found = true;
945 break;
946 }
947 }
948
949 if (!found)
950 continue;
951 }
952
953 p = strdup(e + 1);
954 if (!p)
955 return -ENOMEM;
956
957 *path = p;
958 return 0;
959 }
960
961 return -ENODATA;
962 }
963
964 int cg_install_release_agent(const char *controller, const char *agent) {
965 _cleanup_free_ char *fs = NULL, *contents = NULL;
966 const char *sc;
967 int r, unified;
968
969 assert(agent);
970
971 unified = cg_unified(controller);
972 if (unified < 0)
973 return unified;
974 if (unified) /* doesn't apply to unified hierarchy */
975 return -EOPNOTSUPP;
976
977 r = cg_get_path(controller, NULL, "release_agent", &fs);
978 if (r < 0)
979 return r;
980
981 r = read_one_line_file(fs, &contents);
982 if (r < 0)
983 return r;
984
985 sc = strstrip(contents);
986 if (isempty(sc)) {
987 r = write_string_file(fs, agent, 0);
988 if (r < 0)
989 return r;
990 } else if (!path_equal(sc, agent))
991 return -EEXIST;
992
993 fs = mfree(fs);
994 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
995 if (r < 0)
996 return r;
997
998 contents = mfree(contents);
999 r = read_one_line_file(fs, &contents);
1000 if (r < 0)
1001 return r;
1002
1003 sc = strstrip(contents);
1004 if (streq(sc, "0")) {
1005 r = write_string_file(fs, "1", 0);
1006 if (r < 0)
1007 return r;
1008
1009 return 1;
1010 }
1011
1012 if (!streq(sc, "1"))
1013 return -EIO;
1014
1015 return 0;
1016 }
1017
1018 int cg_uninstall_release_agent(const char *controller) {
1019 _cleanup_free_ char *fs = NULL;
1020 int r, unified;
1021
1022 unified = cg_unified(controller);
1023 if (unified < 0)
1024 return unified;
1025 if (unified) /* Doesn't apply to unified hierarchy */
1026 return -EOPNOTSUPP;
1027
1028 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1029 if (r < 0)
1030 return r;
1031
1032 r = write_string_file(fs, "0", 0);
1033 if (r < 0)
1034 return r;
1035
1036 fs = mfree(fs);
1037
1038 r = cg_get_path(controller, NULL, "release_agent", &fs);
1039 if (r < 0)
1040 return r;
1041
1042 r = write_string_file(fs, "", 0);
1043 if (r < 0)
1044 return r;
1045
1046 return 0;
1047 }
1048
1049 int cg_is_empty(const char *controller, const char *path) {
1050 _cleanup_fclose_ FILE *f = NULL;
1051 pid_t pid;
1052 int r;
1053
1054 assert(path);
1055
1056 r = cg_enumerate_processes(controller, path, &f);
1057 if (r == -ENOENT)
1058 return 1;
1059 if (r < 0)
1060 return r;
1061
1062 r = cg_read_pid(f, &pid);
1063 if (r < 0)
1064 return r;
1065
1066 return r == 0;
1067 }
1068
1069 int cg_is_empty_recursive(const char *controller, const char *path) {
1070 int unified, r;
1071
1072 assert(path);
1073
1074 /* The root cgroup is always populated */
1075 if (controller && (isempty(path) || path_equal(path, "/")))
1076 return false;
1077
1078 unified = cg_unified(controller);
1079 if (unified < 0)
1080 return unified;
1081
1082 if (unified > 0) {
1083 _cleanup_free_ char *t = NULL;
1084
1085 /* On the unified hierarchy we can check empty state
1086 * via the "populated" attribute of "cgroup.events". */
1087
1088 r = cg_read_event(controller, path, "populated", &t);
1089 if (r < 0)
1090 return r;
1091
1092 return streq(t, "0");
1093 } else {
1094 _cleanup_closedir_ DIR *d = NULL;
1095 char *fn;
1096
1097 r = cg_is_empty(controller, path);
1098 if (r <= 0)
1099 return r;
1100
1101 r = cg_enumerate_subgroups(controller, path, &d);
1102 if (r == -ENOENT)
1103 return 1;
1104 if (r < 0)
1105 return r;
1106
1107 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1108 _cleanup_free_ char *p = NULL;
1109
1110 p = strjoin(path, "/", fn, NULL);
1111 free(fn);
1112 if (!p)
1113 return -ENOMEM;
1114
1115 r = cg_is_empty_recursive(controller, p);
1116 if (r <= 0)
1117 return r;
1118 }
1119 if (r < 0)
1120 return r;
1121
1122 return true;
1123 }
1124 }
1125
1126 int cg_split_spec(const char *spec, char **controller, char **path) {
1127 char *t = NULL, *u = NULL;
1128 const char *e;
1129
1130 assert(spec);
1131
1132 if (*spec == '/') {
1133 if (!path_is_safe(spec))
1134 return -EINVAL;
1135
1136 if (path) {
1137 t = strdup(spec);
1138 if (!t)
1139 return -ENOMEM;
1140
1141 *path = path_kill_slashes(t);
1142 }
1143
1144 if (controller)
1145 *controller = NULL;
1146
1147 return 0;
1148 }
1149
1150 e = strchr(spec, ':');
1151 if (!e) {
1152 if (!cg_controller_is_valid(spec))
1153 return -EINVAL;
1154
1155 if (controller) {
1156 t = strdup(spec);
1157 if (!t)
1158 return -ENOMEM;
1159
1160 *controller = t;
1161 }
1162
1163 if (path)
1164 *path = NULL;
1165
1166 return 0;
1167 }
1168
1169 t = strndup(spec, e-spec);
1170 if (!t)
1171 return -ENOMEM;
1172 if (!cg_controller_is_valid(t)) {
1173 free(t);
1174 return -EINVAL;
1175 }
1176
1177 if (isempty(e+1))
1178 u = NULL;
1179 else {
1180 u = strdup(e+1);
1181 if (!u) {
1182 free(t);
1183 return -ENOMEM;
1184 }
1185
1186 if (!path_is_safe(u) ||
1187 !path_is_absolute(u)) {
1188 free(t);
1189 free(u);
1190 return -EINVAL;
1191 }
1192
1193 path_kill_slashes(u);
1194 }
1195
1196 if (controller)
1197 *controller = t;
1198 else
1199 free(t);
1200
1201 if (path)
1202 *path = u;
1203 else
1204 free(u);
1205
1206 return 0;
1207 }
1208
1209 int cg_mangle_path(const char *path, char **result) {
1210 _cleanup_free_ char *c = NULL, *p = NULL;
1211 char *t;
1212 int r;
1213
1214 assert(path);
1215 assert(result);
1216
1217 /* First, check if it already is a filesystem path */
1218 if (path_startswith(path, "/sys/fs/cgroup")) {
1219
1220 t = strdup(path);
1221 if (!t)
1222 return -ENOMEM;
1223
1224 *result = path_kill_slashes(t);
1225 return 0;
1226 }
1227
1228 /* Otherwise, treat it as cg spec */
1229 r = cg_split_spec(path, &c, &p);
1230 if (r < 0)
1231 return r;
1232
1233 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1234 }
1235
1236 int cg_get_root_path(char **path) {
1237 char *p, *e;
1238 int r;
1239
1240 assert(path);
1241
1242 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1243 if (r < 0)
1244 return r;
1245
1246 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1247 if (!e)
1248 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1249 if (!e)
1250 e = endswith(p, "/system"); /* even more legacy */
1251 if (e)
1252 *e = 0;
1253
1254 *path = p;
1255 return 0;
1256 }
1257
1258 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1259 _cleanup_free_ char *rt = NULL;
1260 char *p;
1261 int r;
1262
1263 assert(cgroup);
1264 assert(shifted);
1265
1266 if (!root) {
1267 /* If the root was specified let's use that, otherwise
1268 * let's determine it from PID 1 */
1269
1270 r = cg_get_root_path(&rt);
1271 if (r < 0)
1272 return r;
1273
1274 root = rt;
1275 }
1276
1277 p = path_startswith(cgroup, root);
1278 if (p && p > cgroup)
1279 *shifted = p - 1;
1280 else
1281 *shifted = cgroup;
1282
1283 return 0;
1284 }
1285
1286 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1287 _cleanup_free_ char *raw = NULL;
1288 const char *c;
1289 int r;
1290
1291 assert(pid >= 0);
1292 assert(cgroup);
1293
1294 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1295 if (r < 0)
1296 return r;
1297
1298 r = cg_shift_path(raw, root, &c);
1299 if (r < 0)
1300 return r;
1301
1302 if (c == raw) {
1303 *cgroup = raw;
1304 raw = NULL;
1305 } else {
1306 char *n;
1307
1308 n = strdup(c);
1309 if (!n)
1310 return -ENOMEM;
1311
1312 *cgroup = n;
1313 }
1314
1315 return 0;
1316 }
1317
1318 int cg_path_decode_unit(const char *cgroup, char **unit) {
1319 char *c, *s;
1320 size_t n;
1321
1322 assert(cgroup);
1323 assert(unit);
1324
1325 n = strcspn(cgroup, "/");
1326 if (n < 3)
1327 return -ENXIO;
1328
1329 c = strndupa(cgroup, n);
1330 c = cg_unescape(c);
1331
1332 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1333 return -ENXIO;
1334
1335 s = strdup(c);
1336 if (!s)
1337 return -ENOMEM;
1338
1339 *unit = s;
1340 return 0;
1341 }
1342
1343 static bool valid_slice_name(const char *p, size_t n) {
1344
1345 if (!p)
1346 return false;
1347
1348 if (n < strlen("x.slice"))
1349 return false;
1350
1351 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1352 char buf[n+1], *c;
1353
1354 memcpy(buf, p, n);
1355 buf[n] = 0;
1356
1357 c = cg_unescape(buf);
1358
1359 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1360 }
1361
1362 return false;
1363 }
1364
1365 static const char *skip_slices(const char *p) {
1366 assert(p);
1367
1368 /* Skips over all slice assignments */
1369
1370 for (;;) {
1371 size_t n;
1372
1373 p += strspn(p, "/");
1374
1375 n = strcspn(p, "/");
1376 if (!valid_slice_name(p, n))
1377 return p;
1378
1379 p += n;
1380 }
1381 }
1382
1383 int cg_path_get_unit(const char *path, char **ret) {
1384 const char *e;
1385 char *unit;
1386 int r;
1387
1388 assert(path);
1389 assert(ret);
1390
1391 e = skip_slices(path);
1392
1393 r = cg_path_decode_unit(e, &unit);
1394 if (r < 0)
1395 return r;
1396
1397 /* We skipped over the slices, don't accept any now */
1398 if (endswith(unit, ".slice")) {
1399 free(unit);
1400 return -ENXIO;
1401 }
1402
1403 *ret = unit;
1404 return 0;
1405 }
1406
1407 int cg_pid_get_unit(pid_t pid, char **unit) {
1408 _cleanup_free_ char *cgroup = NULL;
1409 int r;
1410
1411 assert(unit);
1412
1413 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1414 if (r < 0)
1415 return r;
1416
1417 return cg_path_get_unit(cgroup, unit);
1418 }
1419
1420 /**
1421 * Skip session-*.scope, but require it to be there.
1422 */
1423 static const char *skip_session(const char *p) {
1424 size_t n;
1425
1426 if (isempty(p))
1427 return NULL;
1428
1429 p += strspn(p, "/");
1430
1431 n = strcspn(p, "/");
1432 if (n < strlen("session-x.scope"))
1433 return NULL;
1434
1435 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1436 char buf[n - 8 - 6 + 1];
1437
1438 memcpy(buf, p + 8, n - 8 - 6);
1439 buf[n - 8 - 6] = 0;
1440
1441 /* Note that session scopes never need unescaping,
1442 * since they cannot conflict with the kernel's own
1443 * names, hence we don't need to call cg_unescape()
1444 * here. */
1445
1446 if (!session_id_valid(buf))
1447 return false;
1448
1449 p += n;
1450 p += strspn(p, "/");
1451 return p;
1452 }
1453
1454 return NULL;
1455 }
1456
1457 /**
1458 * Skip user@*.service, but require it to be there.
1459 */
1460 static const char *skip_user_manager(const char *p) {
1461 size_t n;
1462
1463 if (isempty(p))
1464 return NULL;
1465
1466 p += strspn(p, "/");
1467
1468 n = strcspn(p, "/");
1469 if (n < strlen("user@x.service"))
1470 return NULL;
1471
1472 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1473 char buf[n - 5 - 8 + 1];
1474
1475 memcpy(buf, p + 5, n - 5 - 8);
1476 buf[n - 5 - 8] = 0;
1477
1478 /* Note that user manager services never need unescaping,
1479 * since they cannot conflict with the kernel's own
1480 * names, hence we don't need to call cg_unescape()
1481 * here. */
1482
1483 if (parse_uid(buf, NULL) < 0)
1484 return NULL;
1485
1486 p += n;
1487 p += strspn(p, "/");
1488
1489 return p;
1490 }
1491
1492 return NULL;
1493 }
1494
1495 static const char *skip_user_prefix(const char *path) {
1496 const char *e, *t;
1497
1498 assert(path);
1499
1500 /* Skip slices, if there are any */
1501 e = skip_slices(path);
1502
1503 /* Skip the user manager, if it's in the path now... */
1504 t = skip_user_manager(e);
1505 if (t)
1506 return t;
1507
1508 /* Alternatively skip the user session if it is in the path... */
1509 return skip_session(e);
1510 }
1511
1512 int cg_path_get_user_unit(const char *path, char **ret) {
1513 const char *t;
1514
1515 assert(path);
1516 assert(ret);
1517
1518 t = skip_user_prefix(path);
1519 if (!t)
1520 return -ENXIO;
1521
1522 /* And from here on it looks pretty much the same as for a
1523 * system unit, hence let's use the same parser from here
1524 * on. */
1525 return cg_path_get_unit(t, ret);
1526 }
1527
1528 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1529 _cleanup_free_ char *cgroup = NULL;
1530 int r;
1531
1532 assert(unit);
1533
1534 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1535 if (r < 0)
1536 return r;
1537
1538 return cg_path_get_user_unit(cgroup, unit);
1539 }
1540
1541 int cg_path_get_machine_name(const char *path, char **machine) {
1542 _cleanup_free_ char *u = NULL;
1543 const char *sl;
1544 int r;
1545
1546 r = cg_path_get_unit(path, &u);
1547 if (r < 0)
1548 return r;
1549
1550 sl = strjoina("/run/systemd/machines/unit:", u);
1551 return readlink_malloc(sl, machine);
1552 }
1553
1554 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1555 _cleanup_free_ char *cgroup = NULL;
1556 int r;
1557
1558 assert(machine);
1559
1560 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1561 if (r < 0)
1562 return r;
1563
1564 return cg_path_get_machine_name(cgroup, machine);
1565 }
1566
1567 int cg_path_get_session(const char *path, char **session) {
1568 _cleanup_free_ char *unit = NULL;
1569 char *start, *end;
1570 int r;
1571
1572 assert(path);
1573
1574 r = cg_path_get_unit(path, &unit);
1575 if (r < 0)
1576 return r;
1577
1578 start = startswith(unit, "session-");
1579 if (!start)
1580 return -ENXIO;
1581 end = endswith(start, ".scope");
1582 if (!end)
1583 return -ENXIO;
1584
1585 *end = 0;
1586 if (!session_id_valid(start))
1587 return -ENXIO;
1588
1589 if (session) {
1590 char *rr;
1591
1592 rr = strdup(start);
1593 if (!rr)
1594 return -ENOMEM;
1595
1596 *session = rr;
1597 }
1598
1599 return 0;
1600 }
1601
1602 int cg_pid_get_session(pid_t pid, char **session) {
1603 _cleanup_free_ char *cgroup = NULL;
1604 int r;
1605
1606 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1607 if (r < 0)
1608 return r;
1609
1610 return cg_path_get_session(cgroup, session);
1611 }
1612
1613 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1614 _cleanup_free_ char *slice = NULL;
1615 char *start, *end;
1616 int r;
1617
1618 assert(path);
1619
1620 r = cg_path_get_slice(path, &slice);
1621 if (r < 0)
1622 return r;
1623
1624 start = startswith(slice, "user-");
1625 if (!start)
1626 return -ENXIO;
1627 end = endswith(start, ".slice");
1628 if (!end)
1629 return -ENXIO;
1630
1631 *end = 0;
1632 if (parse_uid(start, uid) < 0)
1633 return -ENXIO;
1634
1635 return 0;
1636 }
1637
1638 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1639 _cleanup_free_ char *cgroup = NULL;
1640 int r;
1641
1642 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1643 if (r < 0)
1644 return r;
1645
1646 return cg_path_get_owner_uid(cgroup, uid);
1647 }
1648
1649 int cg_path_get_slice(const char *p, char **slice) {
1650 const char *e = NULL;
1651
1652 assert(p);
1653 assert(slice);
1654
1655 /* Finds the right-most slice unit from the beginning, but
1656 * stops before we come to the first non-slice unit. */
1657
1658 for (;;) {
1659 size_t n;
1660
1661 p += strspn(p, "/");
1662
1663 n = strcspn(p, "/");
1664 if (!valid_slice_name(p, n)) {
1665
1666 if (!e) {
1667 char *s;
1668
1669 s = strdup("-.slice");
1670 if (!s)
1671 return -ENOMEM;
1672
1673 *slice = s;
1674 return 0;
1675 }
1676
1677 return cg_path_decode_unit(e, slice);
1678 }
1679
1680 e = p;
1681 p += n;
1682 }
1683 }
1684
1685 int cg_pid_get_slice(pid_t pid, char **slice) {
1686 _cleanup_free_ char *cgroup = NULL;
1687 int r;
1688
1689 assert(slice);
1690
1691 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1692 if (r < 0)
1693 return r;
1694
1695 return cg_path_get_slice(cgroup, slice);
1696 }
1697
1698 int cg_path_get_user_slice(const char *p, char **slice) {
1699 const char *t;
1700 assert(p);
1701 assert(slice);
1702
1703 t = skip_user_prefix(p);
1704 if (!t)
1705 return -ENXIO;
1706
1707 /* And now it looks pretty much the same as for a system
1708 * slice, so let's just use the same parser from here on. */
1709 return cg_path_get_slice(t, slice);
1710 }
1711
1712 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1713 _cleanup_free_ char *cgroup = NULL;
1714 int r;
1715
1716 assert(slice);
1717
1718 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1719 if (r < 0)
1720 return r;
1721
1722 return cg_path_get_user_slice(cgroup, slice);
1723 }
1724
1725 char *cg_escape(const char *p) {
1726 bool need_prefix = false;
1727
1728 /* This implements very minimal escaping for names to be used
1729 * as file names in the cgroup tree: any name which might
1730 * conflict with a kernel name or is prefixed with '_' is
1731 * prefixed with a '_'. That way, when reading cgroup names it
1732 * is sufficient to remove a single prefixing underscore if
1733 * there is one. */
1734
1735 /* The return value of this function (unlike cg_unescape())
1736 * needs free()! */
1737
1738 if (p[0] == 0 ||
1739 p[0] == '_' ||
1740 p[0] == '.' ||
1741 streq(p, "notify_on_release") ||
1742 streq(p, "release_agent") ||
1743 streq(p, "tasks") ||
1744 startswith(p, "cgroup."))
1745 need_prefix = true;
1746 else {
1747 const char *dot;
1748
1749 dot = strrchr(p, '.');
1750 if (dot) {
1751 CGroupController c;
1752 size_t l = dot - p;
1753
1754 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1755 const char *n;
1756
1757 n = cgroup_controller_to_string(c);
1758
1759 if (l != strlen(n))
1760 continue;
1761
1762 if (memcmp(p, n, l) != 0)
1763 continue;
1764
1765 need_prefix = true;
1766 break;
1767 }
1768 }
1769 }
1770
1771 if (need_prefix)
1772 return strappend("_", p);
1773
1774 return strdup(p);
1775 }
1776
1777 char *cg_unescape(const char *p) {
1778 assert(p);
1779
1780 /* The return value of this function (unlike cg_escape())
1781 * doesn't need free()! */
1782
1783 if (p[0] == '_')
1784 return (char*) p+1;
1785
1786 return (char*) p;
1787 }
1788
1789 #define CONTROLLER_VALID \
1790 DIGITS LETTERS \
1791 "_"
1792
1793 bool cg_controller_is_valid(const char *p) {
1794 const char *t, *s;
1795
1796 if (!p)
1797 return false;
1798
1799 s = startswith(p, "name=");
1800 if (s)
1801 p = s;
1802
1803 if (*p == 0 || *p == '_')
1804 return false;
1805
1806 for (t = p; *t; t++)
1807 if (!strchr(CONTROLLER_VALID, *t))
1808 return false;
1809
1810 if (t - p > FILENAME_MAX)
1811 return false;
1812
1813 return true;
1814 }
1815
1816 int cg_slice_to_path(const char *unit, char **ret) {
1817 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1818 const char *dash;
1819 int r;
1820
1821 assert(unit);
1822 assert(ret);
1823
1824 if (streq(unit, "-.slice")) {
1825 char *x;
1826
1827 x = strdup("");
1828 if (!x)
1829 return -ENOMEM;
1830 *ret = x;
1831 return 0;
1832 }
1833
1834 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1835 return -EINVAL;
1836
1837 if (!endswith(unit, ".slice"))
1838 return -EINVAL;
1839
1840 r = unit_name_to_prefix(unit, &p);
1841 if (r < 0)
1842 return r;
1843
1844 dash = strchr(p, '-');
1845
1846 /* Don't allow initial dashes */
1847 if (dash == p)
1848 return -EINVAL;
1849
1850 while (dash) {
1851 _cleanup_free_ char *escaped = NULL;
1852 char n[dash - p + sizeof(".slice")];
1853
1854 /* Don't allow trailing or double dashes */
1855 if (dash[1] == 0 || dash[1] == '-')
1856 return -EINVAL;
1857
1858 strcpy(stpncpy(n, p, dash - p), ".slice");
1859 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1860 return -EINVAL;
1861
1862 escaped = cg_escape(n);
1863 if (!escaped)
1864 return -ENOMEM;
1865
1866 if (!strextend(&s, escaped, "/", NULL))
1867 return -ENOMEM;
1868
1869 dash = strchr(dash+1, '-');
1870 }
1871
1872 e = cg_escape(unit);
1873 if (!e)
1874 return -ENOMEM;
1875
1876 if (!strextend(&s, e, NULL))
1877 return -ENOMEM;
1878
1879 *ret = s;
1880 s = NULL;
1881
1882 return 0;
1883 }
1884
1885 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1886 _cleanup_free_ char *p = NULL;
1887 int r;
1888
1889 r = cg_get_path(controller, path, attribute, &p);
1890 if (r < 0)
1891 return r;
1892
1893 return write_string_file(p, value, 0);
1894 }
1895
1896 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1897 _cleanup_free_ char *p = NULL;
1898 int r;
1899
1900 r = cg_get_path(controller, path, attribute, &p);
1901 if (r < 0)
1902 return r;
1903
1904 return read_one_line_file(p, ret);
1905 }
1906
1907 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
1908 _cleanup_free_ char *filename = NULL, *content = NULL;
1909 char *line, *p;
1910 int i, r;
1911
1912 for (i = 0; keys[i]; i++)
1913 values[i] = NULL;
1914
1915 r = cg_get_path(controller, path, attribute, &filename);
1916 if (r < 0)
1917 return r;
1918
1919 r = read_full_file(filename, &content, NULL);
1920 if (r < 0)
1921 return r;
1922
1923 p = content;
1924 while ((line = strsep(&p, "\n"))) {
1925 char *key;
1926
1927 key = strsep(&line, " ");
1928
1929 for (i = 0; keys[i]; i++) {
1930 if (streq(key, keys[i])) {
1931 values[i] = strdup(line);
1932 break;
1933 }
1934 }
1935 }
1936
1937 for (i = 0; keys[i]; i++) {
1938 if (!values[i]) {
1939 for (i = 0; keys[i]; i++) {
1940 free(values[i]);
1941 values[i] = NULL;
1942 }
1943 return -ENOENT;
1944 }
1945 }
1946
1947 return 0;
1948 }
1949
1950 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1951 CGroupController c;
1952 int r, unified;
1953
1954 /* This one will create a cgroup in our private tree, but also
1955 * duplicate it in the trees specified in mask, and remove it
1956 * in all others */
1957
1958 /* First create the cgroup in our own hierarchy. */
1959 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1960 if (r < 0)
1961 return r;
1962
1963 /* If we are in the unified hierarchy, we are done now */
1964 unified = cg_all_unified();
1965 if (unified < 0)
1966 return unified;
1967 if (unified > 0)
1968 return 0;
1969
1970 /* Otherwise, do the same in the other hierarchies */
1971 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1972 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1973 const char *n;
1974
1975 n = cgroup_controller_to_string(c);
1976
1977 if (mask & bit)
1978 (void) cg_create(n, path);
1979 else if (supported & bit)
1980 (void) cg_trim(n, path, true);
1981 }
1982
1983 return 0;
1984 }
1985
1986 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1987 CGroupController c;
1988 int r, unified;
1989
1990 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1991 if (r < 0)
1992 return r;
1993
1994 unified = cg_all_unified();
1995 if (unified < 0)
1996 return unified;
1997 if (unified > 0)
1998 return 0;
1999
2000 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2001 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2002 const char *p = NULL;
2003
2004 if (!(supported & bit))
2005 continue;
2006
2007 if (path_callback)
2008 p = path_callback(bit, userdata);
2009
2010 if (!p)
2011 p = path;
2012
2013 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2014 }
2015
2016 return 0;
2017 }
2018
2019 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2020 Iterator i;
2021 void *pidp;
2022 int r = 0;
2023
2024 SET_FOREACH(pidp, pids, i) {
2025 pid_t pid = PTR_TO_PID(pidp);
2026 int q;
2027
2028 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2029 if (q < 0 && r >= 0)
2030 r = q;
2031 }
2032
2033 return r;
2034 }
2035
2036 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2037 CGroupController c;
2038 int r = 0, unified;
2039
2040 if (!path_equal(from, to)) {
2041 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2042 if (r < 0)
2043 return r;
2044 }
2045
2046 unified = cg_all_unified();
2047 if (unified < 0)
2048 return unified;
2049 if (unified > 0)
2050 return r;
2051
2052 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2053 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2054 const char *p = NULL;
2055
2056 if (!(supported & bit))
2057 continue;
2058
2059 if (to_callback)
2060 p = to_callback(bit, userdata);
2061
2062 if (!p)
2063 p = to;
2064
2065 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2066 }
2067
2068 return 0;
2069 }
2070
2071 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2072 CGroupController c;
2073 int r, unified;
2074
2075 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2076 if (r < 0)
2077 return r;
2078
2079 unified = cg_all_unified();
2080 if (unified < 0)
2081 return unified;
2082 if (unified > 0)
2083 return r;
2084
2085 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2086 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2087
2088 if (!(supported & bit))
2089 continue;
2090
2091 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2092 }
2093
2094 return 0;
2095 }
2096
2097 int cg_mask_supported(CGroupMask *ret) {
2098 CGroupMask mask = 0;
2099 int r, unified;
2100
2101 /* Determines the mask of supported cgroup controllers. Only
2102 * includes controllers we can make sense of and that are
2103 * actually accessible. */
2104
2105 unified = cg_all_unified();
2106 if (unified < 0)
2107 return unified;
2108 if (unified > 0) {
2109 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2110 const char *c;
2111
2112 /* In the unified hierarchy we can read the supported
2113 * and accessible controllers from a the top-level
2114 * cgroup attribute */
2115
2116 r = cg_get_root_path(&root);
2117 if (r < 0)
2118 return r;
2119
2120 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2121 if (r < 0)
2122 return r;
2123
2124 r = read_one_line_file(path, &controllers);
2125 if (r < 0)
2126 return r;
2127
2128 c = controllers;
2129 for (;;) {
2130 _cleanup_free_ char *n = NULL;
2131 CGroupController v;
2132
2133 r = extract_first_word(&c, &n, NULL, 0);
2134 if (r < 0)
2135 return r;
2136 if (r == 0)
2137 break;
2138
2139 v = cgroup_controller_from_string(n);
2140 if (v < 0)
2141 continue;
2142
2143 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2144 }
2145
2146 /* Currently, we support the cpu, memory, io and pids
2147 * controller in the unified hierarchy, mask
2148 * everything else off. */
2149 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2150
2151 } else {
2152 CGroupController c;
2153
2154 /* In the legacy hierarchy, we check whether which
2155 * hierarchies are mounted. */
2156
2157 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2158 const char *n;
2159
2160 n = cgroup_controller_to_string(c);
2161 if (controller_is_accessible(n) >= 0)
2162 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2163 }
2164 }
2165
2166 *ret = mask;
2167 return 0;
2168 }
2169
2170 int cg_kernel_controllers(Set *controllers) {
2171 _cleanup_fclose_ FILE *f = NULL;
2172 char buf[LINE_MAX];
2173 int r;
2174
2175 assert(controllers);
2176
2177 /* Determines the full list of kernel-known controllers. Might
2178 * include controllers we don't actually support, arbitrary
2179 * named hierarchies and controllers that aren't currently
2180 * accessible (because not mounted). */
2181
2182 f = fopen("/proc/cgroups", "re");
2183 if (!f) {
2184 if (errno == ENOENT)
2185 return 0;
2186 return -errno;
2187 }
2188
2189 /* Ignore the header line */
2190 (void) fgets(buf, sizeof(buf), f);
2191
2192 for (;;) {
2193 char *controller;
2194 int enabled = 0;
2195
2196 errno = 0;
2197 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2198
2199 if (feof(f))
2200 break;
2201
2202 if (ferror(f) && errno > 0)
2203 return -errno;
2204
2205 return -EBADMSG;
2206 }
2207
2208 if (!enabled) {
2209 free(controller);
2210 continue;
2211 }
2212
2213 if (!cg_controller_is_valid(controller)) {
2214 free(controller);
2215 return -EBADMSG;
2216 }
2217
2218 r = set_consume(controllers, controller);
2219 if (r < 0)
2220 return r;
2221 }
2222
2223 return 0;
2224 }
2225
2226 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2227
2228 static int cg_update_unified(void) {
2229
2230 struct statfs fs;
2231
2232 /* Checks if we support the unified hierarchy. Returns an
2233 * error when the cgroup hierarchies aren't mounted yet or we
2234 * have any other trouble determining if the unified hierarchy
2235 * is supported. */
2236
2237 if (unified_cache >= CGROUP_UNIFIED_NONE)
2238 return 0;
2239
2240 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2241 return -errno;
2242
2243 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2244 unified_cache = CGROUP_UNIFIED_ALL;
2245 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2246 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2247 return -errno;
2248
2249 unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
2250 CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
2251 } else
2252 return -ENOMEDIUM;
2253
2254 return 0;
2255 }
2256
2257 int cg_unified(const char *controller) {
2258
2259 int r;
2260
2261 r = cg_update_unified();
2262 if (r < 0)
2263 return r;
2264
2265 if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
2266 return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
2267 else
2268 return unified_cache >= CGROUP_UNIFIED_ALL;
2269 }
2270
2271 int cg_all_unified(void) {
2272
2273 return cg_unified(NULL);
2274 }
2275
2276 void cg_unified_flush(void) {
2277 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2278 }
2279
2280 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2281 _cleanup_free_ char *fs = NULL;
2282 CGroupController c;
2283 int r, unified;
2284
2285 assert(p);
2286
2287 if (supported == 0)
2288 return 0;
2289
2290 unified = cg_all_unified();
2291 if (unified < 0)
2292 return unified;
2293 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2294 return 0;
2295
2296 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2297 if (r < 0)
2298 return r;
2299
2300 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2301 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2302 const char *n;
2303
2304 if (!(supported & bit))
2305 continue;
2306
2307 n = cgroup_controller_to_string(c);
2308 {
2309 char s[1 + strlen(n) + 1];
2310
2311 s[0] = mask & bit ? '+' : '-';
2312 strcpy(s + 1, n);
2313
2314 r = write_string_file(fs, s, 0);
2315 if (r < 0)
2316 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2317 }
2318 }
2319
2320 return 0;
2321 }
2322
2323 bool cg_is_unified_wanted(void) {
2324 static thread_local int wanted = -1;
2325 int r, unified;
2326
2327 /* If the hierarchy is already mounted, then follow whatever
2328 * was chosen for it. */
2329 unified = cg_all_unified();
2330 if (unified >= 0)
2331 return unified;
2332
2333 /* Otherwise, let's see what the kernel command line has to
2334 * say. Since checking that is expensive, let's cache the
2335 * result. */
2336 if (wanted >= 0)
2337 return wanted;
2338
2339 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2340 if (r > 0)
2341 return (wanted = true);
2342 else {
2343 _cleanup_free_ char *value = NULL;
2344
2345 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2346 if (r < 0)
2347 return false;
2348 if (r == 0)
2349 return (wanted = false);
2350
2351 return (wanted = parse_boolean(value) > 0);
2352 }
2353 }
2354
2355 bool cg_is_legacy_wanted(void) {
2356 return !cg_is_unified_wanted();
2357 }
2358
2359 bool cg_is_unified_systemd_controller_wanted(void) {
2360 static thread_local int wanted = -1;
2361 int r, unified;
2362
2363 /* If the unified hierarchy is requested in full, no need to
2364 * bother with this. */
2365 if (cg_is_unified_wanted())
2366 return 0;
2367
2368 /* If the hierarchy is already mounted, then follow whatever
2369 * was chosen for it. */
2370 unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
2371 if (unified >= 0)
2372 return unified;
2373
2374 /* Otherwise, let's see what the kernel command line has to
2375 * say. Since checking that is expensive, let's cache the
2376 * result. */
2377 if (wanted >= 0)
2378 return wanted;
2379
2380 r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller", NULL);
2381 if (r > 0)
2382 wanted = false;
2383 else {
2384 _cleanup_free_ char *value = NULL;
2385
2386 r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller=", &value);
2387 if (r < 0)
2388 return true;
2389
2390 if (r == 0)
2391 wanted = true;
2392 else
2393 wanted = parse_boolean(value) <= 0;
2394 }
2395
2396 return wanted;
2397 }
2398
2399 bool cg_is_legacy_systemd_controller_wanted(void) {
2400 return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted();
2401 }
2402
2403 int cg_weight_parse(const char *s, uint64_t *ret) {
2404 uint64_t u;
2405 int r;
2406
2407 if (isempty(s)) {
2408 *ret = CGROUP_WEIGHT_INVALID;
2409 return 0;
2410 }
2411
2412 r = safe_atou64(s, &u);
2413 if (r < 0)
2414 return r;
2415
2416 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2417 return -ERANGE;
2418
2419 *ret = u;
2420 return 0;
2421 }
2422
2423 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2424 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2425 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2426 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2427 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2428 };
2429
2430 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2431 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2432 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2433 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2434 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2435 };
2436
2437 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2438
2439 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2440 uint64_t u;
2441 int r;
2442
2443 if (isempty(s)) {
2444 *ret = CGROUP_CPU_SHARES_INVALID;
2445 return 0;
2446 }
2447
2448 r = safe_atou64(s, &u);
2449 if (r < 0)
2450 return r;
2451
2452 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2453 return -ERANGE;
2454
2455 *ret = u;
2456 return 0;
2457 }
2458
2459 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2460 uint64_t u;
2461 int r;
2462
2463 if (isempty(s)) {
2464 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2465 return 0;
2466 }
2467
2468 r = safe_atou64(s, &u);
2469 if (r < 0)
2470 return r;
2471
2472 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2473 return -ERANGE;
2474
2475 *ret = u;
2476 return 0;
2477 }
2478
2479 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2480 [CGROUP_CONTROLLER_CPU] = "cpu",
2481 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2482 [CGROUP_CONTROLLER_IO] = "io",
2483 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2484 [CGROUP_CONTROLLER_MEMORY] = "memory",
2485 [CGROUP_CONTROLLER_DEVICES] = "devices",
2486 [CGROUP_CONTROLLER_PIDS] = "pids",
2487 };
2488
2489 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);