]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
core: add "invocation ID" concept to service manager
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <ftw.h>
23 #include <limits.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <sys/xattr.h>
32 #include <unistd.h>
33
34 #include "alloc-util.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "dirent-util.h"
38 #include "extract-word.h"
39 #include "fd-util.h"
40 #include "fileio.h"
41 #include "formats-util.h"
42 #include "fs-util.h"
43 #include "log.h"
44 #include "login-util.h"
45 #include "macro.h"
46 #include "missing.h"
47 #include "mkdir.h"
48 #include "parse-util.h"
49 #include "path-util.h"
50 #include "proc-cmdline.h"
51 #include "process-util.h"
52 #include "set.h"
53 #include "special.h"
54 #include "stat-util.h"
55 #include "stdio-util.h"
56 #include "string-table.h"
57 #include "string-util.h"
58 #include "unit-name.h"
59 #include "user-util.h"
60
61 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
62 _cleanup_free_ char *fs = NULL;
63 FILE *f;
64 int r;
65
66 assert(_f);
67
68 r = cg_get_path(controller, path, "cgroup.procs", &fs);
69 if (r < 0)
70 return r;
71
72 f = fopen(fs, "re");
73 if (!f)
74 return -errno;
75
76 *_f = f;
77 return 0;
78 }
79
80 int cg_read_pid(FILE *f, pid_t *_pid) {
81 unsigned long ul;
82
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
85
86 assert(f);
87 assert(_pid);
88
89 errno = 0;
90 if (fscanf(f, "%lu", &ul) != 1) {
91
92 if (feof(f))
93 return 0;
94
95 return errno > 0 ? -errno : -EIO;
96 }
97
98 if (ul <= 0)
99 return -EIO;
100
101 *_pid = (pid_t) ul;
102 return 1;
103 }
104
105 int cg_read_event(const char *controller, const char *path, const char *event,
106 char **val)
107 {
108 _cleanup_free_ char *events = NULL, *content = NULL;
109 char *p, *line;
110 int r;
111
112 r = cg_get_path(controller, path, "cgroup.events", &events);
113 if (r < 0)
114 return r;
115
116 r = read_full_file(events, &content, NULL);
117 if (r < 0)
118 return r;
119
120 p = content;
121 while ((line = strsep(&p, "\n"))) {
122 char *key;
123
124 key = strsep(&line, " ");
125 if (!key || !line)
126 return -EINVAL;
127
128 if (strcmp(key, event))
129 continue;
130
131 *val = strdup(line);
132 return 0;
133 }
134
135 return -ENOENT;
136 }
137
138 bool cg_ns_supported(void) {
139 static thread_local int enabled = -1;
140
141 if (enabled >= 0)
142 return enabled;
143
144 if (access("/proc/self/ns/cgroup", F_OK) == 0)
145 enabled = 1;
146 else
147 enabled = 0;
148
149 return enabled;
150 }
151
152 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
153 _cleanup_free_ char *fs = NULL;
154 int r;
155 DIR *d;
156
157 assert(_d);
158
159 /* This is not recursive! */
160
161 r = cg_get_path(controller, path, NULL, &fs);
162 if (r < 0)
163 return r;
164
165 d = opendir(fs);
166 if (!d)
167 return -errno;
168
169 *_d = d;
170 return 0;
171 }
172
173 int cg_read_subgroup(DIR *d, char **fn) {
174 struct dirent *de;
175
176 assert(d);
177 assert(fn);
178
179 FOREACH_DIRENT_ALL(de, d, return -errno) {
180 char *b;
181
182 if (de->d_type != DT_DIR)
183 continue;
184
185 if (streq(de->d_name, ".") ||
186 streq(de->d_name, ".."))
187 continue;
188
189 b = strdup(de->d_name);
190 if (!b)
191 return -ENOMEM;
192
193 *fn = b;
194 return 1;
195 }
196
197 return 0;
198 }
199
200 int cg_rmdir(const char *controller, const char *path) {
201 _cleanup_free_ char *p = NULL;
202 int r;
203
204 r = cg_get_path(controller, path, NULL, &p);
205 if (r < 0)
206 return r;
207
208 r = rmdir(p);
209 if (r < 0 && errno != ENOENT)
210 return -errno;
211
212 return 0;
213 }
214
215 int cg_kill(
216 const char *controller,
217 const char *path,
218 int sig,
219 CGroupFlags flags,
220 Set *s,
221 cg_kill_log_func_t log_kill,
222 void *userdata) {
223
224 _cleanup_set_free_ Set *allocated_set = NULL;
225 bool done = false;
226 int r, ret = 0;
227 pid_t my_pid;
228
229 assert(sig >= 0);
230
231 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
232 * SIGCONT on SIGKILL. */
233 if (IN_SET(sig, SIGCONT, SIGKILL))
234 flags &= ~CGROUP_SIGCONT;
235
236 /* This goes through the tasks list and kills them all. This
237 * is repeated until no further processes are added to the
238 * tasks list, to properly handle forking processes */
239
240 if (!s) {
241 s = allocated_set = set_new(NULL);
242 if (!s)
243 return -ENOMEM;
244 }
245
246 my_pid = getpid();
247
248 do {
249 _cleanup_fclose_ FILE *f = NULL;
250 pid_t pid = 0;
251 done = true;
252
253 r = cg_enumerate_processes(controller, path, &f);
254 if (r < 0) {
255 if (ret >= 0 && r != -ENOENT)
256 return r;
257
258 return ret;
259 }
260
261 while ((r = cg_read_pid(f, &pid)) > 0) {
262
263 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
264 continue;
265
266 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
267 continue;
268
269 if (log_kill)
270 log_kill(pid, sig, userdata);
271
272 /* If we haven't killed this process yet, kill
273 * it */
274 if (kill(pid, sig) < 0) {
275 if (ret >= 0 && errno != ESRCH)
276 ret = -errno;
277 } else {
278 if (flags & CGROUP_SIGCONT)
279 (void) kill(pid, SIGCONT);
280
281 if (ret == 0)
282 ret = 1;
283 }
284
285 done = false;
286
287 r = set_put(s, PID_TO_PTR(pid));
288 if (r < 0) {
289 if (ret >= 0)
290 return r;
291
292 return ret;
293 }
294 }
295
296 if (r < 0) {
297 if (ret >= 0)
298 return r;
299
300 return ret;
301 }
302
303 /* To avoid racing against processes which fork
304 * quicker than we can kill them we repeat this until
305 * no new pids need to be killed. */
306
307 } while (!done);
308
309 return ret;
310 }
311
312 int cg_kill_recursive(
313 const char *controller,
314 const char *path,
315 int sig,
316 CGroupFlags flags,
317 Set *s,
318 cg_kill_log_func_t log_kill,
319 void *userdata) {
320
321 _cleanup_set_free_ Set *allocated_set = NULL;
322 _cleanup_closedir_ DIR *d = NULL;
323 int r, ret;
324 char *fn;
325
326 assert(path);
327 assert(sig >= 0);
328
329 if (!s) {
330 s = allocated_set = set_new(NULL);
331 if (!s)
332 return -ENOMEM;
333 }
334
335 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
336
337 r = cg_enumerate_subgroups(controller, path, &d);
338 if (r < 0) {
339 if (ret >= 0 && r != -ENOENT)
340 return r;
341
342 return ret;
343 }
344
345 while ((r = cg_read_subgroup(d, &fn)) > 0) {
346 _cleanup_free_ char *p = NULL;
347
348 p = strjoin(path, "/", fn, NULL);
349 free(fn);
350 if (!p)
351 return -ENOMEM;
352
353 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
354 if (r != 0 && ret >= 0)
355 ret = r;
356 }
357 if (ret >= 0 && r < 0)
358 ret = r;
359
360 if (flags & CGROUP_REMOVE) {
361 r = cg_rmdir(controller, path);
362 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
363 return r;
364 }
365
366 return ret;
367 }
368
369 int cg_migrate(
370 const char *cfrom,
371 const char *pfrom,
372 const char *cto,
373 const char *pto,
374 CGroupFlags flags) {
375
376 bool done = false;
377 _cleanup_set_free_ Set *s = NULL;
378 int r, ret = 0;
379 pid_t my_pid;
380
381 assert(cfrom);
382 assert(pfrom);
383 assert(cto);
384 assert(pto);
385
386 s = set_new(NULL);
387 if (!s)
388 return -ENOMEM;
389
390 my_pid = getpid();
391
392 do {
393 _cleanup_fclose_ FILE *f = NULL;
394 pid_t pid = 0;
395 done = true;
396
397 r = cg_enumerate_processes(cfrom, pfrom, &f);
398 if (r < 0) {
399 if (ret >= 0 && r != -ENOENT)
400 return r;
401
402 return ret;
403 }
404
405 while ((r = cg_read_pid(f, &pid)) > 0) {
406
407 /* This might do weird stuff if we aren't a
408 * single-threaded program. However, we
409 * luckily know we are not */
410 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
411 continue;
412
413 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
414 continue;
415
416 /* Ignore kernel threads. Since they can only
417 * exist in the root cgroup, we only check for
418 * them there. */
419 if (cfrom &&
420 (isempty(pfrom) || path_equal(pfrom, "/")) &&
421 is_kernel_thread(pid) > 0)
422 continue;
423
424 r = cg_attach(cto, pto, pid);
425 if (r < 0) {
426 if (ret >= 0 && r != -ESRCH)
427 ret = r;
428 } else if (ret == 0)
429 ret = 1;
430
431 done = false;
432
433 r = set_put(s, PID_TO_PTR(pid));
434 if (r < 0) {
435 if (ret >= 0)
436 return r;
437
438 return ret;
439 }
440 }
441
442 if (r < 0) {
443 if (ret >= 0)
444 return r;
445
446 return ret;
447 }
448 } while (!done);
449
450 return ret;
451 }
452
453 int cg_migrate_recursive(
454 const char *cfrom,
455 const char *pfrom,
456 const char *cto,
457 const char *pto,
458 CGroupFlags flags) {
459
460 _cleanup_closedir_ DIR *d = NULL;
461 int r, ret = 0;
462 char *fn;
463
464 assert(cfrom);
465 assert(pfrom);
466 assert(cto);
467 assert(pto);
468
469 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
470
471 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
472 if (r < 0) {
473 if (ret >= 0 && r != -ENOENT)
474 return r;
475
476 return ret;
477 }
478
479 while ((r = cg_read_subgroup(d, &fn)) > 0) {
480 _cleanup_free_ char *p = NULL;
481
482 p = strjoin(pfrom, "/", fn, NULL);
483 free(fn);
484 if (!p)
485 return -ENOMEM;
486
487 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
488 if (r != 0 && ret >= 0)
489 ret = r;
490 }
491
492 if (r < 0 && ret >= 0)
493 ret = r;
494
495 if (flags & CGROUP_REMOVE) {
496 r = cg_rmdir(cfrom, pfrom);
497 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
498 return r;
499 }
500
501 return ret;
502 }
503
504 int cg_migrate_recursive_fallback(
505 const char *cfrom,
506 const char *pfrom,
507 const char *cto,
508 const char *pto,
509 CGroupFlags flags) {
510
511 int r;
512
513 assert(cfrom);
514 assert(pfrom);
515 assert(cto);
516 assert(pto);
517
518 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
519 if (r < 0) {
520 char prefix[strlen(pto) + 1];
521
522 /* This didn't work? Then let's try all prefixes of the destination */
523
524 PATH_FOREACH_PREFIX(prefix, pto) {
525 int q;
526
527 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
528 if (q >= 0)
529 return q;
530 }
531 }
532
533 return r;
534 }
535
536 static const char *controller_to_dirname(const char *controller) {
537 const char *e;
538
539 assert(controller);
540
541 /* Converts a controller name to the directory name below
542 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
543 * just cuts off the name= prefixed used for named
544 * hierarchies, if it is specified. */
545
546 e = startswith(controller, "name=");
547 if (e)
548 return e;
549
550 return controller;
551 }
552
553 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
554 const char *dn;
555 char *t = NULL;
556
557 assert(fs);
558 assert(controller);
559
560 dn = controller_to_dirname(controller);
561
562 if (isempty(path) && isempty(suffix))
563 t = strappend("/sys/fs/cgroup/", dn);
564 else if (isempty(path))
565 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
566 else if (isempty(suffix))
567 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
568 else
569 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
570 if (!t)
571 return -ENOMEM;
572
573 *fs = t;
574 return 0;
575 }
576
577 static int join_path_unified(const char *path, const char *suffix, char **fs) {
578 char *t;
579
580 assert(fs);
581
582 if (isempty(path) && isempty(suffix))
583 t = strdup("/sys/fs/cgroup");
584 else if (isempty(path))
585 t = strappend("/sys/fs/cgroup/", suffix);
586 else if (isempty(suffix))
587 t = strappend("/sys/fs/cgroup/", path);
588 else
589 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
590 if (!t)
591 return -ENOMEM;
592
593 *fs = t;
594 return 0;
595 }
596
597 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
598 int unified, r;
599
600 assert(fs);
601
602 if (!controller) {
603 char *t;
604
605 /* If no controller is specified, we return the path
606 * *below* the controllers, without any prefix. */
607
608 if (!path && !suffix)
609 return -EINVAL;
610
611 if (!suffix)
612 t = strdup(path);
613 else if (!path)
614 t = strdup(suffix);
615 else
616 t = strjoin(path, "/", suffix, NULL);
617 if (!t)
618 return -ENOMEM;
619
620 *fs = path_kill_slashes(t);
621 return 0;
622 }
623
624 if (!cg_controller_is_valid(controller))
625 return -EINVAL;
626
627 unified = cg_all_unified();
628 if (unified < 0)
629 return unified;
630
631 if (unified > 0)
632 r = join_path_unified(path, suffix, fs);
633 else
634 r = join_path_legacy(controller, path, suffix, fs);
635 if (r < 0)
636 return r;
637
638 path_kill_slashes(*fs);
639 return 0;
640 }
641
642 static int controller_is_accessible(const char *controller) {
643 int unified;
644
645 assert(controller);
646
647 /* Checks whether a specific controller is accessible,
648 * i.e. its hierarchy mounted. In the unified hierarchy all
649 * controllers are considered accessible, except for the named
650 * hierarchies */
651
652 if (!cg_controller_is_valid(controller))
653 return -EINVAL;
654
655 unified = cg_all_unified();
656 if (unified < 0)
657 return unified;
658 if (unified > 0) {
659 /* We don't support named hierarchies if we are using
660 * the unified hierarchy. */
661
662 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
663 return 0;
664
665 if (startswith(controller, "name="))
666 return -EOPNOTSUPP;
667
668 } else {
669 const char *cc, *dn;
670
671 dn = controller_to_dirname(controller);
672 cc = strjoina("/sys/fs/cgroup/", dn);
673
674 if (laccess(cc, F_OK) < 0)
675 return -errno;
676 }
677
678 return 0;
679 }
680
681 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
682 int r;
683
684 assert(controller);
685 assert(fs);
686
687 /* Check if the specified controller is actually accessible */
688 r = controller_is_accessible(controller);
689 if (r < 0)
690 return r;
691
692 return cg_get_path(controller, path, suffix, fs);
693 }
694
695 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
696 assert(path);
697 assert(sb);
698 assert(ftwbuf);
699
700 if (typeflag != FTW_DP)
701 return 0;
702
703 if (ftwbuf->level < 1)
704 return 0;
705
706 (void) rmdir(path);
707 return 0;
708 }
709
710 int cg_trim(const char *controller, const char *path, bool delete_root) {
711 _cleanup_free_ char *fs = NULL;
712 int r = 0;
713
714 assert(path);
715
716 r = cg_get_path(controller, path, NULL, &fs);
717 if (r < 0)
718 return r;
719
720 errno = 0;
721 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
722 if (errno == ENOENT)
723 r = 0;
724 else if (errno > 0)
725 r = -errno;
726 else
727 r = -EIO;
728 }
729
730 if (delete_root) {
731 if (rmdir(fs) < 0 && errno != ENOENT)
732 return -errno;
733 }
734
735 return r;
736 }
737
738 int cg_create(const char *controller, const char *path) {
739 _cleanup_free_ char *fs = NULL;
740 int r;
741
742 r = cg_get_path_and_check(controller, path, NULL, &fs);
743 if (r < 0)
744 return r;
745
746 r = mkdir_parents(fs, 0755);
747 if (r < 0)
748 return r;
749
750 if (mkdir(fs, 0755) < 0) {
751
752 if (errno == EEXIST)
753 return 0;
754
755 return -errno;
756 }
757
758 return 1;
759 }
760
761 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
762 int r, q;
763
764 assert(pid >= 0);
765
766 r = cg_create(controller, path);
767 if (r < 0)
768 return r;
769
770 q = cg_attach(controller, path, pid);
771 if (q < 0)
772 return q;
773
774 /* This does not remove the cgroup on failure */
775 return r;
776 }
777
778 int cg_attach(const char *controller, const char *path, pid_t pid) {
779 _cleanup_free_ char *fs = NULL;
780 char c[DECIMAL_STR_MAX(pid_t) + 2];
781 int r;
782
783 assert(path);
784 assert(pid >= 0);
785
786 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
787 if (r < 0)
788 return r;
789
790 if (pid == 0)
791 pid = getpid();
792
793 xsprintf(c, PID_FMT "\n", pid);
794
795 return write_string_file(fs, c, 0);
796 }
797
798 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
799 int r;
800
801 assert(controller);
802 assert(path);
803 assert(pid >= 0);
804
805 r = cg_attach(controller, path, pid);
806 if (r < 0) {
807 char prefix[strlen(path) + 1];
808
809 /* This didn't work? Then let's try all prefixes of
810 * the destination */
811
812 PATH_FOREACH_PREFIX(prefix, path) {
813 int q;
814
815 q = cg_attach(controller, prefix, pid);
816 if (q >= 0)
817 return q;
818 }
819 }
820
821 return r;
822 }
823
824 int cg_set_group_access(
825 const char *controller,
826 const char *path,
827 mode_t mode,
828 uid_t uid,
829 gid_t gid) {
830
831 _cleanup_free_ char *fs = NULL;
832 int r;
833
834 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
835 return 0;
836
837 if (mode != MODE_INVALID)
838 mode &= 0777;
839
840 r = cg_get_path(controller, path, NULL, &fs);
841 if (r < 0)
842 return r;
843
844 return chmod_and_chown(fs, mode, uid, gid);
845 }
846
847 int cg_set_task_access(
848 const char *controller,
849 const char *path,
850 mode_t mode,
851 uid_t uid,
852 gid_t gid) {
853
854 _cleanup_free_ char *fs = NULL, *procs = NULL;
855 int r, unified;
856
857 assert(path);
858
859 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
860 return 0;
861
862 if (mode != MODE_INVALID)
863 mode &= 0666;
864
865 r = cg_get_path(controller, path, "cgroup.procs", &fs);
866 if (r < 0)
867 return r;
868
869 r = chmod_and_chown(fs, mode, uid, gid);
870 if (r < 0)
871 return r;
872
873 unified = cg_unified(controller);
874 if (unified < 0)
875 return unified;
876 if (unified)
877 return 0;
878
879 /* Compatibility, Always keep values for "tasks" in sync with
880 * "cgroup.procs" */
881 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
882 (void) chmod_and_chown(procs, mode, uid, gid);
883
884 return 0;
885 }
886
887 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
888 _cleanup_free_ char *fs = NULL;
889 int r;
890
891 assert(path);
892 assert(name);
893 assert(value || size <= 0);
894
895 r = cg_get_path(controller, path, NULL, &fs);
896 if (r < 0)
897 return r;
898
899 if (setxattr(fs, name, value, size, flags) < 0)
900 return -errno;
901
902 return 0;
903 }
904
905 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
906 _cleanup_free_ char *fs = NULL;
907 ssize_t n;
908 int r;
909
910 assert(path);
911 assert(name);
912
913 r = cg_get_path(controller, path, NULL, &fs);
914 if (r < 0)
915 return r;
916
917 n = getxattr(fs, name, value, size);
918 if (n < 0)
919 return -errno;
920
921 return (int) n;
922 }
923
924 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
925 _cleanup_fclose_ FILE *f = NULL;
926 char line[LINE_MAX];
927 const char *fs;
928 size_t cs = 0;
929 int unified;
930
931 assert(path);
932 assert(pid >= 0);
933
934 if (controller) {
935 if (!cg_controller_is_valid(controller))
936 return -EINVAL;
937 } else
938 controller = SYSTEMD_CGROUP_CONTROLLER;
939
940 unified = cg_unified(controller);
941 if (unified < 0)
942 return unified;
943 if (unified == 0)
944 cs = strlen(controller);
945
946 fs = procfs_file_alloca(pid, "cgroup");
947 f = fopen(fs, "re");
948 if (!f)
949 return errno == ENOENT ? -ESRCH : -errno;
950
951 FOREACH_LINE(line, f, return -errno) {
952 char *e, *p;
953
954 truncate_nl(line);
955
956 if (unified) {
957 e = startswith(line, "0:");
958 if (!e)
959 continue;
960
961 e = strchr(e, ':');
962 if (!e)
963 continue;
964 } else {
965 char *l;
966 size_t k;
967 const char *word, *state;
968 bool found = false;
969
970 l = strchr(line, ':');
971 if (!l)
972 continue;
973
974 l++;
975 e = strchr(l, ':');
976 if (!e)
977 continue;
978
979 *e = 0;
980 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
981 if (k == cs && memcmp(word, controller, cs) == 0) {
982 found = true;
983 break;
984 }
985 }
986
987 if (!found)
988 continue;
989 }
990
991 p = strdup(e + 1);
992 if (!p)
993 return -ENOMEM;
994
995 *path = p;
996 return 0;
997 }
998
999 return -ENODATA;
1000 }
1001
1002 int cg_install_release_agent(const char *controller, const char *agent) {
1003 _cleanup_free_ char *fs = NULL, *contents = NULL;
1004 const char *sc;
1005 int r, unified;
1006
1007 assert(agent);
1008
1009 unified = cg_unified(controller);
1010 if (unified < 0)
1011 return unified;
1012 if (unified) /* doesn't apply to unified hierarchy */
1013 return -EOPNOTSUPP;
1014
1015 r = cg_get_path(controller, NULL, "release_agent", &fs);
1016 if (r < 0)
1017 return r;
1018
1019 r = read_one_line_file(fs, &contents);
1020 if (r < 0)
1021 return r;
1022
1023 sc = strstrip(contents);
1024 if (isempty(sc)) {
1025 r = write_string_file(fs, agent, 0);
1026 if (r < 0)
1027 return r;
1028 } else if (!path_equal(sc, agent))
1029 return -EEXIST;
1030
1031 fs = mfree(fs);
1032 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1033 if (r < 0)
1034 return r;
1035
1036 contents = mfree(contents);
1037 r = read_one_line_file(fs, &contents);
1038 if (r < 0)
1039 return r;
1040
1041 sc = strstrip(contents);
1042 if (streq(sc, "0")) {
1043 r = write_string_file(fs, "1", 0);
1044 if (r < 0)
1045 return r;
1046
1047 return 1;
1048 }
1049
1050 if (!streq(sc, "1"))
1051 return -EIO;
1052
1053 return 0;
1054 }
1055
1056 int cg_uninstall_release_agent(const char *controller) {
1057 _cleanup_free_ char *fs = NULL;
1058 int r, unified;
1059
1060 unified = cg_unified(controller);
1061 if (unified < 0)
1062 return unified;
1063 if (unified) /* Doesn't apply to unified hierarchy */
1064 return -EOPNOTSUPP;
1065
1066 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1067 if (r < 0)
1068 return r;
1069
1070 r = write_string_file(fs, "0", 0);
1071 if (r < 0)
1072 return r;
1073
1074 fs = mfree(fs);
1075
1076 r = cg_get_path(controller, NULL, "release_agent", &fs);
1077 if (r < 0)
1078 return r;
1079
1080 r = write_string_file(fs, "", 0);
1081 if (r < 0)
1082 return r;
1083
1084 return 0;
1085 }
1086
1087 int cg_is_empty(const char *controller, const char *path) {
1088 _cleanup_fclose_ FILE *f = NULL;
1089 pid_t pid;
1090 int r;
1091
1092 assert(path);
1093
1094 r = cg_enumerate_processes(controller, path, &f);
1095 if (r == -ENOENT)
1096 return 1;
1097 if (r < 0)
1098 return r;
1099
1100 r = cg_read_pid(f, &pid);
1101 if (r < 0)
1102 return r;
1103
1104 return r == 0;
1105 }
1106
1107 int cg_is_empty_recursive(const char *controller, const char *path) {
1108 int unified, r;
1109
1110 assert(path);
1111
1112 /* The root cgroup is always populated */
1113 if (controller && (isempty(path) || path_equal(path, "/")))
1114 return false;
1115
1116 unified = cg_unified(controller);
1117 if (unified < 0)
1118 return unified;
1119
1120 if (unified > 0) {
1121 _cleanup_free_ char *t = NULL;
1122
1123 /* On the unified hierarchy we can check empty state
1124 * via the "populated" attribute of "cgroup.events". */
1125
1126 r = cg_read_event(controller, path, "populated", &t);
1127 if (r < 0)
1128 return r;
1129
1130 return streq(t, "0");
1131 } else {
1132 _cleanup_closedir_ DIR *d = NULL;
1133 char *fn;
1134
1135 r = cg_is_empty(controller, path);
1136 if (r <= 0)
1137 return r;
1138
1139 r = cg_enumerate_subgroups(controller, path, &d);
1140 if (r == -ENOENT)
1141 return 1;
1142 if (r < 0)
1143 return r;
1144
1145 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1146 _cleanup_free_ char *p = NULL;
1147
1148 p = strjoin(path, "/", fn, NULL);
1149 free(fn);
1150 if (!p)
1151 return -ENOMEM;
1152
1153 r = cg_is_empty_recursive(controller, p);
1154 if (r <= 0)
1155 return r;
1156 }
1157 if (r < 0)
1158 return r;
1159
1160 return true;
1161 }
1162 }
1163
1164 int cg_split_spec(const char *spec, char **controller, char **path) {
1165 char *t = NULL, *u = NULL;
1166 const char *e;
1167
1168 assert(spec);
1169
1170 if (*spec == '/') {
1171 if (!path_is_safe(spec))
1172 return -EINVAL;
1173
1174 if (path) {
1175 t = strdup(spec);
1176 if (!t)
1177 return -ENOMEM;
1178
1179 *path = path_kill_slashes(t);
1180 }
1181
1182 if (controller)
1183 *controller = NULL;
1184
1185 return 0;
1186 }
1187
1188 e = strchr(spec, ':');
1189 if (!e) {
1190 if (!cg_controller_is_valid(spec))
1191 return -EINVAL;
1192
1193 if (controller) {
1194 t = strdup(spec);
1195 if (!t)
1196 return -ENOMEM;
1197
1198 *controller = t;
1199 }
1200
1201 if (path)
1202 *path = NULL;
1203
1204 return 0;
1205 }
1206
1207 t = strndup(spec, e-spec);
1208 if (!t)
1209 return -ENOMEM;
1210 if (!cg_controller_is_valid(t)) {
1211 free(t);
1212 return -EINVAL;
1213 }
1214
1215 if (isempty(e+1))
1216 u = NULL;
1217 else {
1218 u = strdup(e+1);
1219 if (!u) {
1220 free(t);
1221 return -ENOMEM;
1222 }
1223
1224 if (!path_is_safe(u) ||
1225 !path_is_absolute(u)) {
1226 free(t);
1227 free(u);
1228 return -EINVAL;
1229 }
1230
1231 path_kill_slashes(u);
1232 }
1233
1234 if (controller)
1235 *controller = t;
1236 else
1237 free(t);
1238
1239 if (path)
1240 *path = u;
1241 else
1242 free(u);
1243
1244 return 0;
1245 }
1246
1247 int cg_mangle_path(const char *path, char **result) {
1248 _cleanup_free_ char *c = NULL, *p = NULL;
1249 char *t;
1250 int r;
1251
1252 assert(path);
1253 assert(result);
1254
1255 /* First, check if it already is a filesystem path */
1256 if (path_startswith(path, "/sys/fs/cgroup")) {
1257
1258 t = strdup(path);
1259 if (!t)
1260 return -ENOMEM;
1261
1262 *result = path_kill_slashes(t);
1263 return 0;
1264 }
1265
1266 /* Otherwise, treat it as cg spec */
1267 r = cg_split_spec(path, &c, &p);
1268 if (r < 0)
1269 return r;
1270
1271 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1272 }
1273
1274 int cg_get_root_path(char **path) {
1275 char *p, *e;
1276 int r;
1277
1278 assert(path);
1279
1280 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1281 if (r < 0)
1282 return r;
1283
1284 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1285 if (!e)
1286 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1287 if (!e)
1288 e = endswith(p, "/system"); /* even more legacy */
1289 if (e)
1290 *e = 0;
1291
1292 *path = p;
1293 return 0;
1294 }
1295
1296 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1297 _cleanup_free_ char *rt = NULL;
1298 char *p;
1299 int r;
1300
1301 assert(cgroup);
1302 assert(shifted);
1303
1304 if (!root) {
1305 /* If the root was specified let's use that, otherwise
1306 * let's determine it from PID 1 */
1307
1308 r = cg_get_root_path(&rt);
1309 if (r < 0)
1310 return r;
1311
1312 root = rt;
1313 }
1314
1315 p = path_startswith(cgroup, root);
1316 if (p && p > cgroup)
1317 *shifted = p - 1;
1318 else
1319 *shifted = cgroup;
1320
1321 return 0;
1322 }
1323
1324 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1325 _cleanup_free_ char *raw = NULL;
1326 const char *c;
1327 int r;
1328
1329 assert(pid >= 0);
1330 assert(cgroup);
1331
1332 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1333 if (r < 0)
1334 return r;
1335
1336 r = cg_shift_path(raw, root, &c);
1337 if (r < 0)
1338 return r;
1339
1340 if (c == raw) {
1341 *cgroup = raw;
1342 raw = NULL;
1343 } else {
1344 char *n;
1345
1346 n = strdup(c);
1347 if (!n)
1348 return -ENOMEM;
1349
1350 *cgroup = n;
1351 }
1352
1353 return 0;
1354 }
1355
1356 int cg_path_decode_unit(const char *cgroup, char **unit) {
1357 char *c, *s;
1358 size_t n;
1359
1360 assert(cgroup);
1361 assert(unit);
1362
1363 n = strcspn(cgroup, "/");
1364 if (n < 3)
1365 return -ENXIO;
1366
1367 c = strndupa(cgroup, n);
1368 c = cg_unescape(c);
1369
1370 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1371 return -ENXIO;
1372
1373 s = strdup(c);
1374 if (!s)
1375 return -ENOMEM;
1376
1377 *unit = s;
1378 return 0;
1379 }
1380
1381 static bool valid_slice_name(const char *p, size_t n) {
1382
1383 if (!p)
1384 return false;
1385
1386 if (n < strlen("x.slice"))
1387 return false;
1388
1389 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1390 char buf[n+1], *c;
1391
1392 memcpy(buf, p, n);
1393 buf[n] = 0;
1394
1395 c = cg_unescape(buf);
1396
1397 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1398 }
1399
1400 return false;
1401 }
1402
1403 static const char *skip_slices(const char *p) {
1404 assert(p);
1405
1406 /* Skips over all slice assignments */
1407
1408 for (;;) {
1409 size_t n;
1410
1411 p += strspn(p, "/");
1412
1413 n = strcspn(p, "/");
1414 if (!valid_slice_name(p, n))
1415 return p;
1416
1417 p += n;
1418 }
1419 }
1420
1421 int cg_path_get_unit(const char *path, char **ret) {
1422 const char *e;
1423 char *unit;
1424 int r;
1425
1426 assert(path);
1427 assert(ret);
1428
1429 e = skip_slices(path);
1430
1431 r = cg_path_decode_unit(e, &unit);
1432 if (r < 0)
1433 return r;
1434
1435 /* We skipped over the slices, don't accept any now */
1436 if (endswith(unit, ".slice")) {
1437 free(unit);
1438 return -ENXIO;
1439 }
1440
1441 *ret = unit;
1442 return 0;
1443 }
1444
1445 int cg_pid_get_unit(pid_t pid, char **unit) {
1446 _cleanup_free_ char *cgroup = NULL;
1447 int r;
1448
1449 assert(unit);
1450
1451 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1452 if (r < 0)
1453 return r;
1454
1455 return cg_path_get_unit(cgroup, unit);
1456 }
1457
1458 /**
1459 * Skip session-*.scope, but require it to be there.
1460 */
1461 static const char *skip_session(const char *p) {
1462 size_t n;
1463
1464 if (isempty(p))
1465 return NULL;
1466
1467 p += strspn(p, "/");
1468
1469 n = strcspn(p, "/");
1470 if (n < strlen("session-x.scope"))
1471 return NULL;
1472
1473 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1474 char buf[n - 8 - 6 + 1];
1475
1476 memcpy(buf, p + 8, n - 8 - 6);
1477 buf[n - 8 - 6] = 0;
1478
1479 /* Note that session scopes never need unescaping,
1480 * since they cannot conflict with the kernel's own
1481 * names, hence we don't need to call cg_unescape()
1482 * here. */
1483
1484 if (!session_id_valid(buf))
1485 return false;
1486
1487 p += n;
1488 p += strspn(p, "/");
1489 return p;
1490 }
1491
1492 return NULL;
1493 }
1494
1495 /**
1496 * Skip user@*.service, but require it to be there.
1497 */
1498 static const char *skip_user_manager(const char *p) {
1499 size_t n;
1500
1501 if (isempty(p))
1502 return NULL;
1503
1504 p += strspn(p, "/");
1505
1506 n = strcspn(p, "/");
1507 if (n < strlen("user@x.service"))
1508 return NULL;
1509
1510 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1511 char buf[n - 5 - 8 + 1];
1512
1513 memcpy(buf, p + 5, n - 5 - 8);
1514 buf[n - 5 - 8] = 0;
1515
1516 /* Note that user manager services never need unescaping,
1517 * since they cannot conflict with the kernel's own
1518 * names, hence we don't need to call cg_unescape()
1519 * here. */
1520
1521 if (parse_uid(buf, NULL) < 0)
1522 return NULL;
1523
1524 p += n;
1525 p += strspn(p, "/");
1526
1527 return p;
1528 }
1529
1530 return NULL;
1531 }
1532
1533 static const char *skip_user_prefix(const char *path) {
1534 const char *e, *t;
1535
1536 assert(path);
1537
1538 /* Skip slices, if there are any */
1539 e = skip_slices(path);
1540
1541 /* Skip the user manager, if it's in the path now... */
1542 t = skip_user_manager(e);
1543 if (t)
1544 return t;
1545
1546 /* Alternatively skip the user session if it is in the path... */
1547 return skip_session(e);
1548 }
1549
1550 int cg_path_get_user_unit(const char *path, char **ret) {
1551 const char *t;
1552
1553 assert(path);
1554 assert(ret);
1555
1556 t = skip_user_prefix(path);
1557 if (!t)
1558 return -ENXIO;
1559
1560 /* And from here on it looks pretty much the same as for a
1561 * system unit, hence let's use the same parser from here
1562 * on. */
1563 return cg_path_get_unit(t, ret);
1564 }
1565
1566 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1567 _cleanup_free_ char *cgroup = NULL;
1568 int r;
1569
1570 assert(unit);
1571
1572 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1573 if (r < 0)
1574 return r;
1575
1576 return cg_path_get_user_unit(cgroup, unit);
1577 }
1578
1579 int cg_path_get_machine_name(const char *path, char **machine) {
1580 _cleanup_free_ char *u = NULL;
1581 const char *sl;
1582 int r;
1583
1584 r = cg_path_get_unit(path, &u);
1585 if (r < 0)
1586 return r;
1587
1588 sl = strjoina("/run/systemd/machines/unit:", u);
1589 return readlink_malloc(sl, machine);
1590 }
1591
1592 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1593 _cleanup_free_ char *cgroup = NULL;
1594 int r;
1595
1596 assert(machine);
1597
1598 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1599 if (r < 0)
1600 return r;
1601
1602 return cg_path_get_machine_name(cgroup, machine);
1603 }
1604
1605 int cg_path_get_session(const char *path, char **session) {
1606 _cleanup_free_ char *unit = NULL;
1607 char *start, *end;
1608 int r;
1609
1610 assert(path);
1611
1612 r = cg_path_get_unit(path, &unit);
1613 if (r < 0)
1614 return r;
1615
1616 start = startswith(unit, "session-");
1617 if (!start)
1618 return -ENXIO;
1619 end = endswith(start, ".scope");
1620 if (!end)
1621 return -ENXIO;
1622
1623 *end = 0;
1624 if (!session_id_valid(start))
1625 return -ENXIO;
1626
1627 if (session) {
1628 char *rr;
1629
1630 rr = strdup(start);
1631 if (!rr)
1632 return -ENOMEM;
1633
1634 *session = rr;
1635 }
1636
1637 return 0;
1638 }
1639
1640 int cg_pid_get_session(pid_t pid, char **session) {
1641 _cleanup_free_ char *cgroup = NULL;
1642 int r;
1643
1644 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1645 if (r < 0)
1646 return r;
1647
1648 return cg_path_get_session(cgroup, session);
1649 }
1650
1651 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1652 _cleanup_free_ char *slice = NULL;
1653 char *start, *end;
1654 int r;
1655
1656 assert(path);
1657
1658 r = cg_path_get_slice(path, &slice);
1659 if (r < 0)
1660 return r;
1661
1662 start = startswith(slice, "user-");
1663 if (!start)
1664 return -ENXIO;
1665 end = endswith(start, ".slice");
1666 if (!end)
1667 return -ENXIO;
1668
1669 *end = 0;
1670 if (parse_uid(start, uid) < 0)
1671 return -ENXIO;
1672
1673 return 0;
1674 }
1675
1676 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1677 _cleanup_free_ char *cgroup = NULL;
1678 int r;
1679
1680 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1681 if (r < 0)
1682 return r;
1683
1684 return cg_path_get_owner_uid(cgroup, uid);
1685 }
1686
1687 int cg_path_get_slice(const char *p, char **slice) {
1688 const char *e = NULL;
1689
1690 assert(p);
1691 assert(slice);
1692
1693 /* Finds the right-most slice unit from the beginning, but
1694 * stops before we come to the first non-slice unit. */
1695
1696 for (;;) {
1697 size_t n;
1698
1699 p += strspn(p, "/");
1700
1701 n = strcspn(p, "/");
1702 if (!valid_slice_name(p, n)) {
1703
1704 if (!e) {
1705 char *s;
1706
1707 s = strdup(SPECIAL_ROOT_SLICE);
1708 if (!s)
1709 return -ENOMEM;
1710
1711 *slice = s;
1712 return 0;
1713 }
1714
1715 return cg_path_decode_unit(e, slice);
1716 }
1717
1718 e = p;
1719 p += n;
1720 }
1721 }
1722
1723 int cg_pid_get_slice(pid_t pid, char **slice) {
1724 _cleanup_free_ char *cgroup = NULL;
1725 int r;
1726
1727 assert(slice);
1728
1729 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1730 if (r < 0)
1731 return r;
1732
1733 return cg_path_get_slice(cgroup, slice);
1734 }
1735
1736 int cg_path_get_user_slice(const char *p, char **slice) {
1737 const char *t;
1738 assert(p);
1739 assert(slice);
1740
1741 t = skip_user_prefix(p);
1742 if (!t)
1743 return -ENXIO;
1744
1745 /* And now it looks pretty much the same as for a system
1746 * slice, so let's just use the same parser from here on. */
1747 return cg_path_get_slice(t, slice);
1748 }
1749
1750 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1751 _cleanup_free_ char *cgroup = NULL;
1752 int r;
1753
1754 assert(slice);
1755
1756 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1757 if (r < 0)
1758 return r;
1759
1760 return cg_path_get_user_slice(cgroup, slice);
1761 }
1762
1763 char *cg_escape(const char *p) {
1764 bool need_prefix = false;
1765
1766 /* This implements very minimal escaping for names to be used
1767 * as file names in the cgroup tree: any name which might
1768 * conflict with a kernel name or is prefixed with '_' is
1769 * prefixed with a '_'. That way, when reading cgroup names it
1770 * is sufficient to remove a single prefixing underscore if
1771 * there is one. */
1772
1773 /* The return value of this function (unlike cg_unescape())
1774 * needs free()! */
1775
1776 if (p[0] == 0 ||
1777 p[0] == '_' ||
1778 p[0] == '.' ||
1779 streq(p, "notify_on_release") ||
1780 streq(p, "release_agent") ||
1781 streq(p, "tasks") ||
1782 startswith(p, "cgroup."))
1783 need_prefix = true;
1784 else {
1785 const char *dot;
1786
1787 dot = strrchr(p, '.');
1788 if (dot) {
1789 CGroupController c;
1790 size_t l = dot - p;
1791
1792 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1793 const char *n;
1794
1795 n = cgroup_controller_to_string(c);
1796
1797 if (l != strlen(n))
1798 continue;
1799
1800 if (memcmp(p, n, l) != 0)
1801 continue;
1802
1803 need_prefix = true;
1804 break;
1805 }
1806 }
1807 }
1808
1809 if (need_prefix)
1810 return strappend("_", p);
1811
1812 return strdup(p);
1813 }
1814
1815 char *cg_unescape(const char *p) {
1816 assert(p);
1817
1818 /* The return value of this function (unlike cg_escape())
1819 * doesn't need free()! */
1820
1821 if (p[0] == '_')
1822 return (char*) p+1;
1823
1824 return (char*) p;
1825 }
1826
1827 #define CONTROLLER_VALID \
1828 DIGITS LETTERS \
1829 "_"
1830
1831 bool cg_controller_is_valid(const char *p) {
1832 const char *t, *s;
1833
1834 if (!p)
1835 return false;
1836
1837 s = startswith(p, "name=");
1838 if (s)
1839 p = s;
1840
1841 if (*p == 0 || *p == '_')
1842 return false;
1843
1844 for (t = p; *t; t++)
1845 if (!strchr(CONTROLLER_VALID, *t))
1846 return false;
1847
1848 if (t - p > FILENAME_MAX)
1849 return false;
1850
1851 return true;
1852 }
1853
1854 int cg_slice_to_path(const char *unit, char **ret) {
1855 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1856 const char *dash;
1857 int r;
1858
1859 assert(unit);
1860 assert(ret);
1861
1862 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1863 char *x;
1864
1865 x = strdup("");
1866 if (!x)
1867 return -ENOMEM;
1868 *ret = x;
1869 return 0;
1870 }
1871
1872 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1873 return -EINVAL;
1874
1875 if (!endswith(unit, ".slice"))
1876 return -EINVAL;
1877
1878 r = unit_name_to_prefix(unit, &p);
1879 if (r < 0)
1880 return r;
1881
1882 dash = strchr(p, '-');
1883
1884 /* Don't allow initial dashes */
1885 if (dash == p)
1886 return -EINVAL;
1887
1888 while (dash) {
1889 _cleanup_free_ char *escaped = NULL;
1890 char n[dash - p + sizeof(".slice")];
1891
1892 /* Don't allow trailing or double dashes */
1893 if (dash[1] == 0 || dash[1] == '-')
1894 return -EINVAL;
1895
1896 strcpy(stpncpy(n, p, dash - p), ".slice");
1897 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1898 return -EINVAL;
1899
1900 escaped = cg_escape(n);
1901 if (!escaped)
1902 return -ENOMEM;
1903
1904 if (!strextend(&s, escaped, "/", NULL))
1905 return -ENOMEM;
1906
1907 dash = strchr(dash+1, '-');
1908 }
1909
1910 e = cg_escape(unit);
1911 if (!e)
1912 return -ENOMEM;
1913
1914 if (!strextend(&s, e, NULL))
1915 return -ENOMEM;
1916
1917 *ret = s;
1918 s = NULL;
1919
1920 return 0;
1921 }
1922
1923 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1924 _cleanup_free_ char *p = NULL;
1925 int r;
1926
1927 r = cg_get_path(controller, path, attribute, &p);
1928 if (r < 0)
1929 return r;
1930
1931 return write_string_file(p, value, 0);
1932 }
1933
1934 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1935 _cleanup_free_ char *p = NULL;
1936 int r;
1937
1938 r = cg_get_path(controller, path, attribute, &p);
1939 if (r < 0)
1940 return r;
1941
1942 return read_one_line_file(p, ret);
1943 }
1944
1945 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
1946 _cleanup_free_ char *filename = NULL, *content = NULL;
1947 char *line, *p;
1948 int i, r;
1949
1950 for (i = 0; keys[i]; i++)
1951 values[i] = NULL;
1952
1953 r = cg_get_path(controller, path, attribute, &filename);
1954 if (r < 0)
1955 return r;
1956
1957 r = read_full_file(filename, &content, NULL);
1958 if (r < 0)
1959 return r;
1960
1961 p = content;
1962 while ((line = strsep(&p, "\n"))) {
1963 char *key;
1964
1965 key = strsep(&line, " ");
1966
1967 for (i = 0; keys[i]; i++) {
1968 if (streq(key, keys[i])) {
1969 values[i] = strdup(line);
1970 break;
1971 }
1972 }
1973 }
1974
1975 for (i = 0; keys[i]; i++) {
1976 if (!values[i]) {
1977 for (i = 0; keys[i]; i++) {
1978 free(values[i]);
1979 values[i] = NULL;
1980 }
1981 return -ENOENT;
1982 }
1983 }
1984
1985 return 0;
1986 }
1987
1988 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1989 CGroupController c;
1990 int r, unified;
1991
1992 /* This one will create a cgroup in our private tree, but also
1993 * duplicate it in the trees specified in mask, and remove it
1994 * in all others */
1995
1996 /* First create the cgroup in our own hierarchy. */
1997 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1998 if (r < 0)
1999 return r;
2000
2001 /* If we are in the unified hierarchy, we are done now */
2002 unified = cg_all_unified();
2003 if (unified < 0)
2004 return unified;
2005 if (unified > 0)
2006 return 0;
2007
2008 /* Otherwise, do the same in the other hierarchies */
2009 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2010 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2011 const char *n;
2012
2013 n = cgroup_controller_to_string(c);
2014
2015 if (mask & bit)
2016 (void) cg_create(n, path);
2017 else if (supported & bit)
2018 (void) cg_trim(n, path, true);
2019 }
2020
2021 return 0;
2022 }
2023
2024 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2025 CGroupController c;
2026 int r, unified;
2027
2028 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2029 if (r < 0)
2030 return r;
2031
2032 unified = cg_all_unified();
2033 if (unified < 0)
2034 return unified;
2035 if (unified > 0)
2036 return 0;
2037
2038 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2039 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2040 const char *p = NULL;
2041
2042 if (!(supported & bit))
2043 continue;
2044
2045 if (path_callback)
2046 p = path_callback(bit, userdata);
2047
2048 if (!p)
2049 p = path;
2050
2051 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2052 }
2053
2054 return 0;
2055 }
2056
2057 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2058 Iterator i;
2059 void *pidp;
2060 int r = 0;
2061
2062 SET_FOREACH(pidp, pids, i) {
2063 pid_t pid = PTR_TO_PID(pidp);
2064 int q;
2065
2066 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2067 if (q < 0 && r >= 0)
2068 r = q;
2069 }
2070
2071 return r;
2072 }
2073
2074 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2075 CGroupController c;
2076 int r = 0, unified;
2077
2078 if (!path_equal(from, to)) {
2079 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2080 if (r < 0)
2081 return r;
2082 }
2083
2084 unified = cg_all_unified();
2085 if (unified < 0)
2086 return unified;
2087 if (unified > 0)
2088 return r;
2089
2090 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2091 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2092 const char *p = NULL;
2093
2094 if (!(supported & bit))
2095 continue;
2096
2097 if (to_callback)
2098 p = to_callback(bit, userdata);
2099
2100 if (!p)
2101 p = to;
2102
2103 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2104 }
2105
2106 return 0;
2107 }
2108
2109 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2110 CGroupController c;
2111 int r, unified;
2112
2113 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2114 if (r < 0)
2115 return r;
2116
2117 unified = cg_all_unified();
2118 if (unified < 0)
2119 return unified;
2120 if (unified > 0)
2121 return r;
2122
2123 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2124 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2125
2126 if (!(supported & bit))
2127 continue;
2128
2129 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2130 }
2131
2132 return 0;
2133 }
2134
2135 int cg_mask_supported(CGroupMask *ret) {
2136 CGroupMask mask = 0;
2137 int r, unified;
2138
2139 /* Determines the mask of supported cgroup controllers. Only
2140 * includes controllers we can make sense of and that are
2141 * actually accessible. */
2142
2143 unified = cg_all_unified();
2144 if (unified < 0)
2145 return unified;
2146 if (unified > 0) {
2147 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2148 const char *c;
2149
2150 /* In the unified hierarchy we can read the supported
2151 * and accessible controllers from a the top-level
2152 * cgroup attribute */
2153
2154 r = cg_get_root_path(&root);
2155 if (r < 0)
2156 return r;
2157
2158 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2159 if (r < 0)
2160 return r;
2161
2162 r = read_one_line_file(path, &controllers);
2163 if (r < 0)
2164 return r;
2165
2166 c = controllers;
2167 for (;;) {
2168 _cleanup_free_ char *n = NULL;
2169 CGroupController v;
2170
2171 r = extract_first_word(&c, &n, NULL, 0);
2172 if (r < 0)
2173 return r;
2174 if (r == 0)
2175 break;
2176
2177 v = cgroup_controller_from_string(n);
2178 if (v < 0)
2179 continue;
2180
2181 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2182 }
2183
2184 /* Currently, we support the cpu, memory, io and pids
2185 * controller in the unified hierarchy, mask
2186 * everything else off. */
2187 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2188
2189 } else {
2190 CGroupController c;
2191
2192 /* In the legacy hierarchy, we check whether which
2193 * hierarchies are mounted. */
2194
2195 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2196 const char *n;
2197
2198 n = cgroup_controller_to_string(c);
2199 if (controller_is_accessible(n) >= 0)
2200 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2201 }
2202 }
2203
2204 *ret = mask;
2205 return 0;
2206 }
2207
2208 int cg_kernel_controllers(Set *controllers) {
2209 _cleanup_fclose_ FILE *f = NULL;
2210 char buf[LINE_MAX];
2211 int r;
2212
2213 assert(controllers);
2214
2215 /* Determines the full list of kernel-known controllers. Might
2216 * include controllers we don't actually support, arbitrary
2217 * named hierarchies and controllers that aren't currently
2218 * accessible (because not mounted). */
2219
2220 f = fopen("/proc/cgroups", "re");
2221 if (!f) {
2222 if (errno == ENOENT)
2223 return 0;
2224 return -errno;
2225 }
2226
2227 /* Ignore the header line */
2228 (void) fgets(buf, sizeof(buf), f);
2229
2230 for (;;) {
2231 char *controller;
2232 int enabled = 0;
2233
2234 errno = 0;
2235 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2236
2237 if (feof(f))
2238 break;
2239
2240 if (ferror(f) && errno > 0)
2241 return -errno;
2242
2243 return -EBADMSG;
2244 }
2245
2246 if (!enabled) {
2247 free(controller);
2248 continue;
2249 }
2250
2251 if (!cg_controller_is_valid(controller)) {
2252 free(controller);
2253 return -EBADMSG;
2254 }
2255
2256 r = set_consume(controllers, controller);
2257 if (r < 0)
2258 return r;
2259 }
2260
2261 return 0;
2262 }
2263
2264 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2265
2266 static int cg_update_unified(void) {
2267
2268 struct statfs fs;
2269
2270 /* Checks if we support the unified hierarchy. Returns an
2271 * error when the cgroup hierarchies aren't mounted yet or we
2272 * have any other trouble determining if the unified hierarchy
2273 * is supported. */
2274
2275 if (unified_cache >= CGROUP_UNIFIED_NONE)
2276 return 0;
2277
2278 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2279 return -errno;
2280
2281 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2282 unified_cache = CGROUP_UNIFIED_ALL;
2283 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2284 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2285 return -errno;
2286
2287 unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
2288 CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
2289 } else
2290 return -ENOMEDIUM;
2291
2292 return 0;
2293 }
2294
2295 int cg_unified(const char *controller) {
2296
2297 int r;
2298
2299 r = cg_update_unified();
2300 if (r < 0)
2301 return r;
2302
2303 if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
2304 return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
2305 else
2306 return unified_cache >= CGROUP_UNIFIED_ALL;
2307 }
2308
2309 int cg_all_unified(void) {
2310
2311 return cg_unified(NULL);
2312 }
2313
2314 void cg_unified_flush(void) {
2315 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2316 }
2317
2318 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2319 _cleanup_free_ char *fs = NULL;
2320 CGroupController c;
2321 int r, unified;
2322
2323 assert(p);
2324
2325 if (supported == 0)
2326 return 0;
2327
2328 unified = cg_all_unified();
2329 if (unified < 0)
2330 return unified;
2331 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2332 return 0;
2333
2334 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2335 if (r < 0)
2336 return r;
2337
2338 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2339 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2340 const char *n;
2341
2342 if (!(supported & bit))
2343 continue;
2344
2345 n = cgroup_controller_to_string(c);
2346 {
2347 char s[1 + strlen(n) + 1];
2348
2349 s[0] = mask & bit ? '+' : '-';
2350 strcpy(s + 1, n);
2351
2352 r = write_string_file(fs, s, 0);
2353 if (r < 0)
2354 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2355 }
2356 }
2357
2358 return 0;
2359 }
2360
2361 bool cg_is_unified_wanted(void) {
2362 static thread_local int wanted = -1;
2363 int r, unified;
2364
2365 /* If the hierarchy is already mounted, then follow whatever
2366 * was chosen for it. */
2367 unified = cg_all_unified();
2368 if (unified >= 0)
2369 return unified;
2370
2371 /* Otherwise, let's see what the kernel command line has to
2372 * say. Since checking that is expensive, let's cache the
2373 * result. */
2374 if (wanted >= 0)
2375 return wanted;
2376
2377 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2378 if (r > 0)
2379 return (wanted = true);
2380 else {
2381 _cleanup_free_ char *value = NULL;
2382
2383 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2384 if (r < 0)
2385 return false;
2386 if (r == 0)
2387 return (wanted = false);
2388
2389 return (wanted = parse_boolean(value) > 0);
2390 }
2391 }
2392
2393 bool cg_is_legacy_wanted(void) {
2394 return !cg_is_unified_wanted();
2395 }
2396
2397 bool cg_is_unified_systemd_controller_wanted(void) {
2398 static thread_local int wanted = -1;
2399 int r, unified;
2400
2401 /* If the unified hierarchy is requested in full, no need to
2402 * bother with this. */
2403 if (cg_is_unified_wanted())
2404 return 0;
2405
2406 /* If the hierarchy is already mounted, then follow whatever
2407 * was chosen for it. */
2408 unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
2409 if (unified >= 0)
2410 return unified;
2411
2412 /* Otherwise, let's see what the kernel command line has to
2413 * say. Since checking that is expensive, let's cache the
2414 * result. */
2415 if (wanted >= 0)
2416 return wanted;
2417
2418 r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller", NULL);
2419 if (r > 0)
2420 wanted = false;
2421 else {
2422 _cleanup_free_ char *value = NULL;
2423
2424 r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller=", &value);
2425 if (r < 0)
2426 return true;
2427
2428 if (r == 0)
2429 wanted = true;
2430 else
2431 wanted = parse_boolean(value) <= 0;
2432 }
2433
2434 return wanted;
2435 }
2436
2437 bool cg_is_legacy_systemd_controller_wanted(void) {
2438 return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted();
2439 }
2440
2441 int cg_weight_parse(const char *s, uint64_t *ret) {
2442 uint64_t u;
2443 int r;
2444
2445 if (isempty(s)) {
2446 *ret = CGROUP_WEIGHT_INVALID;
2447 return 0;
2448 }
2449
2450 r = safe_atou64(s, &u);
2451 if (r < 0)
2452 return r;
2453
2454 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2455 return -ERANGE;
2456
2457 *ret = u;
2458 return 0;
2459 }
2460
2461 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2462 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2463 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2464 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2465 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2466 };
2467
2468 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2469 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2470 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2471 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2472 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2473 };
2474
2475 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2476
2477 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2478 uint64_t u;
2479 int r;
2480
2481 if (isempty(s)) {
2482 *ret = CGROUP_CPU_SHARES_INVALID;
2483 return 0;
2484 }
2485
2486 r = safe_atou64(s, &u);
2487 if (r < 0)
2488 return r;
2489
2490 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2491 return -ERANGE;
2492
2493 *ret = u;
2494 return 0;
2495 }
2496
2497 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2498 uint64_t u;
2499 int r;
2500
2501 if (isempty(s)) {
2502 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2503 return 0;
2504 }
2505
2506 r = safe_atou64(s, &u);
2507 if (r < 0)
2508 return r;
2509
2510 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2511 return -ERANGE;
2512
2513 *ret = u;
2514 return 0;
2515 }
2516
2517 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2518 [CGROUP_CONTROLLER_CPU] = "cpu",
2519 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2520 [CGROUP_CONTROLLER_IO] = "io",
2521 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2522 [CGROUP_CONTROLLER_MEMORY] = "memory",
2523 [CGROUP_CONTROLLER_DEVICES] = "devices",
2524 [CGROUP_CONTROLLER_PIDS] = "pids",
2525 };
2526
2527 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);