]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #1659 from vcaputo/journal_verify_envalid
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <unistd.h>
24 #include <signal.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <ftw.h>
31
32 #include "set.h"
33 #include "macro.h"
34 #include "util.h"
35 #include "formats-util.h"
36 #include "process-util.h"
37 #include "path-util.h"
38 #include "unit-name.h"
39 #include "fileio.h"
40 #include "special.h"
41 #include "mkdir.h"
42 #include "login-util.h"
43 #include "cgroup-util.h"
44
45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
46 _cleanup_free_ char *fs = NULL;
47 FILE *f;
48 int r;
49
50 assert(_f);
51
52 r = cg_get_path(controller, path, "cgroup.procs", &fs);
53 if (r < 0)
54 return r;
55
56 f = fopen(fs, "re");
57 if (!f)
58 return -errno;
59
60 *_f = f;
61 return 0;
62 }
63
64 int cg_read_pid(FILE *f, pid_t *_pid) {
65 unsigned long ul;
66
67 /* Note that the cgroup.procs might contain duplicates! See
68 * cgroups.txt for details. */
69
70 assert(f);
71 assert(_pid);
72
73 errno = 0;
74 if (fscanf(f, "%lu", &ul) != 1) {
75
76 if (feof(f))
77 return 0;
78
79 return errno ? -errno : -EIO;
80 }
81
82 if (ul <= 0)
83 return -EIO;
84
85 *_pid = (pid_t) ul;
86 return 1;
87 }
88
89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
90 _cleanup_free_ char *fs = NULL;
91 int r;
92 DIR *d;
93
94 assert(_d);
95
96 /* This is not recursive! */
97
98 r = cg_get_path(controller, path, NULL, &fs);
99 if (r < 0)
100 return r;
101
102 d = opendir(fs);
103 if (!d)
104 return -errno;
105
106 *_d = d;
107 return 0;
108 }
109
110 int cg_read_subgroup(DIR *d, char **fn) {
111 struct dirent *de;
112
113 assert(d);
114 assert(fn);
115
116 FOREACH_DIRENT_ALL(de, d, return -errno) {
117 char *b;
118
119 if (de->d_type != DT_DIR)
120 continue;
121
122 if (streq(de->d_name, ".") ||
123 streq(de->d_name, ".."))
124 continue;
125
126 b = strdup(de->d_name);
127 if (!b)
128 return -ENOMEM;
129
130 *fn = b;
131 return 1;
132 }
133
134 return 0;
135 }
136
137 int cg_rmdir(const char *controller, const char *path) {
138 _cleanup_free_ char *p = NULL;
139 int r;
140
141 r = cg_get_path(controller, path, NULL, &p);
142 if (r < 0)
143 return r;
144
145 r = rmdir(p);
146 if (r < 0 && errno != ENOENT)
147 return -errno;
148
149 return 0;
150 }
151
152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
153 _cleanup_set_free_ Set *allocated_set = NULL;
154 bool done = false;
155 int r, ret = 0;
156 pid_t my_pid;
157
158 assert(sig >= 0);
159
160 /* This goes through the tasks list and kills them all. This
161 * is repeated until no further processes are added to the
162 * tasks list, to properly handle forking processes */
163
164 if (!s) {
165 s = allocated_set = set_new(NULL);
166 if (!s)
167 return -ENOMEM;
168 }
169
170 my_pid = getpid();
171
172 do {
173 _cleanup_fclose_ FILE *f = NULL;
174 pid_t pid = 0;
175 done = true;
176
177 r = cg_enumerate_processes(controller, path, &f);
178 if (r < 0) {
179 if (ret >= 0 && r != -ENOENT)
180 return r;
181
182 return ret;
183 }
184
185 while ((r = cg_read_pid(f, &pid)) > 0) {
186
187 if (ignore_self && pid == my_pid)
188 continue;
189
190 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
191 continue;
192
193 /* If we haven't killed this process yet, kill
194 * it */
195 if (kill(pid, sig) < 0) {
196 if (ret >= 0 && errno != ESRCH)
197 ret = -errno;
198 } else {
199 if (sigcont && sig != SIGKILL)
200 (void) kill(pid, SIGCONT);
201
202 if (ret == 0)
203 ret = 1;
204 }
205
206 done = false;
207
208 r = set_put(s, PID_TO_PTR(pid));
209 if (r < 0) {
210 if (ret >= 0)
211 return r;
212
213 return ret;
214 }
215 }
216
217 if (r < 0) {
218 if (ret >= 0)
219 return r;
220
221 return ret;
222 }
223
224 /* To avoid racing against processes which fork
225 * quicker than we can kill them we repeat this until
226 * no new pids need to be killed. */
227
228 } while (!done);
229
230 return ret;
231 }
232
233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
234 _cleanup_set_free_ Set *allocated_set = NULL;
235 _cleanup_closedir_ DIR *d = NULL;
236 int r, ret;
237 char *fn;
238
239 assert(path);
240 assert(sig >= 0);
241
242 if (!s) {
243 s = allocated_set = set_new(NULL);
244 if (!s)
245 return -ENOMEM;
246 }
247
248 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
249
250 r = cg_enumerate_subgroups(controller, path, &d);
251 if (r < 0) {
252 if (ret >= 0 && r != -ENOENT)
253 return r;
254
255 return ret;
256 }
257
258 while ((r = cg_read_subgroup(d, &fn)) > 0) {
259 _cleanup_free_ char *p = NULL;
260
261 p = strjoin(path, "/", fn, NULL);
262 free(fn);
263 if (!p)
264 return -ENOMEM;
265
266 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
267 if (r != 0 && ret >= 0)
268 ret = r;
269 }
270
271 if (ret >= 0 && r < 0)
272 ret = r;
273
274 if (rem) {
275 r = cg_rmdir(controller, path);
276 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
277 return r;
278 }
279
280 return ret;
281 }
282
283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
284 bool done = false;
285 _cleanup_set_free_ Set *s = NULL;
286 int r, ret = 0;
287 pid_t my_pid;
288
289 assert(cfrom);
290 assert(pfrom);
291 assert(cto);
292 assert(pto);
293
294 s = set_new(NULL);
295 if (!s)
296 return -ENOMEM;
297
298 my_pid = getpid();
299
300 do {
301 _cleanup_fclose_ FILE *f = NULL;
302 pid_t pid = 0;
303 done = true;
304
305 r = cg_enumerate_processes(cfrom, pfrom, &f);
306 if (r < 0) {
307 if (ret >= 0 && r != -ENOENT)
308 return r;
309
310 return ret;
311 }
312
313 while ((r = cg_read_pid(f, &pid)) > 0) {
314
315 /* This might do weird stuff if we aren't a
316 * single-threaded program. However, we
317 * luckily know we are not */
318 if (ignore_self && pid == my_pid)
319 continue;
320
321 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
322 continue;
323
324 /* Ignore kernel threads. Since they can only
325 * exist in the root cgroup, we only check for
326 * them there. */
327 if (cfrom &&
328 (isempty(pfrom) || path_equal(pfrom, "/")) &&
329 is_kernel_thread(pid) > 0)
330 continue;
331
332 r = cg_attach(cto, pto, pid);
333 if (r < 0) {
334 if (ret >= 0 && r != -ESRCH)
335 ret = r;
336 } else if (ret == 0)
337 ret = 1;
338
339 done = false;
340
341 r = set_put(s, PID_TO_PTR(pid));
342 if (r < 0) {
343 if (ret >= 0)
344 return r;
345
346 return ret;
347 }
348 }
349
350 if (r < 0) {
351 if (ret >= 0)
352 return r;
353
354 return ret;
355 }
356 } while (!done);
357
358 return ret;
359 }
360
361 int cg_migrate_recursive(
362 const char *cfrom,
363 const char *pfrom,
364 const char *cto,
365 const char *pto,
366 bool ignore_self,
367 bool rem) {
368
369 _cleanup_closedir_ DIR *d = NULL;
370 int r, ret = 0;
371 char *fn;
372
373 assert(cfrom);
374 assert(pfrom);
375 assert(cto);
376 assert(pto);
377
378 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
379
380 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
381 if (r < 0) {
382 if (ret >= 0 && r != -ENOENT)
383 return r;
384
385 return ret;
386 }
387
388 while ((r = cg_read_subgroup(d, &fn)) > 0) {
389 _cleanup_free_ char *p = NULL;
390
391 p = strjoin(pfrom, "/", fn, NULL);
392 free(fn);
393 if (!p)
394 return -ENOMEM;
395
396 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
397 if (r != 0 && ret >= 0)
398 ret = r;
399 }
400
401 if (r < 0 && ret >= 0)
402 ret = r;
403
404 if (rem) {
405 r = cg_rmdir(cfrom, pfrom);
406 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
407 return r;
408 }
409
410 return ret;
411 }
412
413 int cg_migrate_recursive_fallback(
414 const char *cfrom,
415 const char *pfrom,
416 const char *cto,
417 const char *pto,
418 bool ignore_self,
419 bool rem) {
420
421 int r;
422
423 assert(cfrom);
424 assert(pfrom);
425 assert(cto);
426 assert(pto);
427
428 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
429 if (r < 0) {
430 char prefix[strlen(pto) + 1];
431
432 /* This didn't work? Then let's try all prefixes of the destination */
433
434 PATH_FOREACH_PREFIX(prefix, pto) {
435 int q;
436
437 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
438 if (q >= 0)
439 return q;
440 }
441 }
442
443 return r;
444 }
445
446 static const char *controller_to_dirname(const char *controller) {
447 const char *e;
448
449 assert(controller);
450
451 /* Converts a controller name to the directory name below
452 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
453 * just cuts off the name= prefixed used for named
454 * hierarchies, if it is specified. */
455
456 e = startswith(controller, "name=");
457 if (e)
458 return e;
459
460 return controller;
461 }
462
463 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
464 const char *dn;
465 char *t = NULL;
466
467 assert(fs);
468 assert(controller);
469
470 dn = controller_to_dirname(controller);
471
472 if (isempty(path) && isempty(suffix))
473 t = strappend("/sys/fs/cgroup/", dn);
474 else if (isempty(path))
475 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
476 else if (isempty(suffix))
477 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
478 else
479 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
480 if (!t)
481 return -ENOMEM;
482
483 *fs = t;
484 return 0;
485 }
486
487 static int join_path_unified(const char *path, const char *suffix, char **fs) {
488 char *t;
489
490 assert(fs);
491
492 if (isempty(path) && isempty(suffix))
493 t = strdup("/sys/fs/cgroup");
494 else if (isempty(path))
495 t = strappend("/sys/fs/cgroup/", suffix);
496 else if (isempty(suffix))
497 t = strappend("/sys/fs/cgroup/", path);
498 else
499 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
500 if (!t)
501 return -ENOMEM;
502
503 *fs = t;
504 return 0;
505 }
506
507 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
508 int unified, r;
509
510 assert(fs);
511
512 if (!controller) {
513 char *t;
514
515 /* If no controller is specified, we return the path
516 * *below* the controllers, without any prefix. */
517
518 if (!path && !suffix)
519 return -EINVAL;
520
521 if (!suffix)
522 t = strdup(path);
523 else if (!path)
524 t = strdup(suffix);
525 else
526 t = strjoin(path, "/", suffix, NULL);
527 if (!t)
528 return -ENOMEM;
529
530 *fs = path_kill_slashes(t);
531 return 0;
532 }
533
534 if (!cg_controller_is_valid(controller))
535 return -EINVAL;
536
537 unified = cg_unified();
538 if (unified < 0)
539 return unified;
540
541 if (unified > 0)
542 r = join_path_unified(path, suffix, fs);
543 else
544 r = join_path_legacy(controller, path, suffix, fs);
545 if (r < 0)
546 return r;
547
548 path_kill_slashes(*fs);
549 return 0;
550 }
551
552 static int controller_is_accessible(const char *controller) {
553 int unified;
554
555 assert(controller);
556
557 /* Checks whether a specific controller is accessible,
558 * i.e. its hierarchy mounted. In the unified hierarchy all
559 * controllers are considered accessible, except for the named
560 * hierarchies */
561
562 if (!cg_controller_is_valid(controller))
563 return -EINVAL;
564
565 unified = cg_unified();
566 if (unified < 0)
567 return unified;
568 if (unified > 0) {
569 /* We don't support named hierarchies if we are using
570 * the unified hierarchy. */
571
572 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
573 return 0;
574
575 if (startswith(controller, "name="))
576 return -EOPNOTSUPP;
577
578 } else {
579 const char *cc, *dn;
580
581 dn = controller_to_dirname(controller);
582 cc = strjoina("/sys/fs/cgroup/", dn);
583
584 if (laccess(cc, F_OK) < 0)
585 return -errno;
586 }
587
588 return 0;
589 }
590
591 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
592 int r;
593
594 assert(controller);
595 assert(fs);
596
597 /* Check if the specified controller is actually accessible */
598 r = controller_is_accessible(controller);
599 if (r < 0)
600 return r;
601
602 return cg_get_path(controller, path, suffix, fs);
603 }
604
605 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
606 assert(path);
607 assert(sb);
608 assert(ftwbuf);
609
610 if (typeflag != FTW_DP)
611 return 0;
612
613 if (ftwbuf->level < 1)
614 return 0;
615
616 (void) rmdir(path);
617 return 0;
618 }
619
620 int cg_trim(const char *controller, const char *path, bool delete_root) {
621 _cleanup_free_ char *fs = NULL;
622 int r = 0;
623
624 assert(path);
625
626 r = cg_get_path(controller, path, NULL, &fs);
627 if (r < 0)
628 return r;
629
630 errno = 0;
631 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
632 if (errno == ENOENT)
633 r = 0;
634 else if (errno != 0)
635 r = -errno;
636 else
637 r = -EIO;
638 }
639
640 if (delete_root) {
641 if (rmdir(fs) < 0 && errno != ENOENT)
642 return -errno;
643 }
644
645 return r;
646 }
647
648 int cg_create(const char *controller, const char *path) {
649 _cleanup_free_ char *fs = NULL;
650 int r;
651
652 r = cg_get_path_and_check(controller, path, NULL, &fs);
653 if (r < 0)
654 return r;
655
656 r = mkdir_parents(fs, 0755);
657 if (r < 0)
658 return r;
659
660 if (mkdir(fs, 0755) < 0) {
661
662 if (errno == EEXIST)
663 return 0;
664
665 return -errno;
666 }
667
668 return 1;
669 }
670
671 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
672 int r, q;
673
674 assert(pid >= 0);
675
676 r = cg_create(controller, path);
677 if (r < 0)
678 return r;
679
680 q = cg_attach(controller, path, pid);
681 if (q < 0)
682 return q;
683
684 /* This does not remove the cgroup on failure */
685 return r;
686 }
687
688 int cg_attach(const char *controller, const char *path, pid_t pid) {
689 _cleanup_free_ char *fs = NULL;
690 char c[DECIMAL_STR_MAX(pid_t) + 2];
691 int r;
692
693 assert(path);
694 assert(pid >= 0);
695
696 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
697 if (r < 0)
698 return r;
699
700 if (pid == 0)
701 pid = getpid();
702
703 snprintf(c, sizeof(c), PID_FMT"\n", pid);
704
705 return write_string_file(fs, c, 0);
706 }
707
708 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
709 int r;
710
711 assert(controller);
712 assert(path);
713 assert(pid >= 0);
714
715 r = cg_attach(controller, path, pid);
716 if (r < 0) {
717 char prefix[strlen(path) + 1];
718
719 /* This didn't work? Then let's try all prefixes of
720 * the destination */
721
722 PATH_FOREACH_PREFIX(prefix, path) {
723 int q;
724
725 q = cg_attach(controller, prefix, pid);
726 if (q >= 0)
727 return q;
728 }
729 }
730
731 return r;
732 }
733
734 int cg_set_group_access(
735 const char *controller,
736 const char *path,
737 mode_t mode,
738 uid_t uid,
739 gid_t gid) {
740
741 _cleanup_free_ char *fs = NULL;
742 int r;
743
744 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
745 return 0;
746
747 if (mode != MODE_INVALID)
748 mode &= 0777;
749
750 r = cg_get_path(controller, path, NULL, &fs);
751 if (r < 0)
752 return r;
753
754 return chmod_and_chown(fs, mode, uid, gid);
755 }
756
757 int cg_set_task_access(
758 const char *controller,
759 const char *path,
760 mode_t mode,
761 uid_t uid,
762 gid_t gid) {
763
764 _cleanup_free_ char *fs = NULL, *procs = NULL;
765 int r, unified;
766
767 assert(path);
768
769 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
770 return 0;
771
772 if (mode != MODE_INVALID)
773 mode &= 0666;
774
775 r = cg_get_path(controller, path, "cgroup.procs", &fs);
776 if (r < 0)
777 return r;
778
779 r = chmod_and_chown(fs, mode, uid, gid);
780 if (r < 0)
781 return r;
782
783 unified = cg_unified();
784 if (unified < 0)
785 return unified;
786 if (unified)
787 return 0;
788
789 /* Compatibility, Always keep values for "tasks" in sync with
790 * "cgroup.procs" */
791 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
792 (void) chmod_and_chown(procs, mode, uid, gid);
793
794 return 0;
795 }
796
797 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
798 _cleanup_fclose_ FILE *f = NULL;
799 char line[LINE_MAX];
800 const char *fs;
801 size_t cs = 0;
802 int unified;
803
804 assert(path);
805 assert(pid >= 0);
806
807 unified = cg_unified();
808 if (unified < 0)
809 return unified;
810 if (unified == 0) {
811 if (controller) {
812 if (!cg_controller_is_valid(controller))
813 return -EINVAL;
814 } else
815 controller = SYSTEMD_CGROUP_CONTROLLER;
816
817 cs = strlen(controller);
818 }
819
820 fs = procfs_file_alloca(pid, "cgroup");
821 f = fopen(fs, "re");
822 if (!f)
823 return errno == ENOENT ? -ESRCH : -errno;
824
825 FOREACH_LINE(line, f, return -errno) {
826 char *e, *p;
827
828 truncate_nl(line);
829
830 if (unified) {
831 e = startswith(line, "0:");
832 if (!e)
833 continue;
834
835 e = strchr(e, ':');
836 if (!e)
837 continue;
838 } else {
839 char *l;
840 size_t k;
841 const char *word, *state;
842 bool found = false;
843
844 l = strchr(line, ':');
845 if (!l)
846 continue;
847
848 l++;
849 e = strchr(l, ':');
850 if (!e)
851 continue;
852
853 *e = 0;
854 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
855 if (k == cs && memcmp(word, controller, cs) == 0) {
856 found = true;
857 break;
858 }
859 }
860
861 if (!found)
862 continue;
863 }
864
865 p = strdup(e + 1);
866 if (!p)
867 return -ENOMEM;
868
869 *path = p;
870 return 0;
871 }
872
873 return -ENODATA;
874 }
875
876 int cg_install_release_agent(const char *controller, const char *agent) {
877 _cleanup_free_ char *fs = NULL, *contents = NULL;
878 const char *sc;
879 int r, unified;
880
881 assert(agent);
882
883 unified = cg_unified();
884 if (unified < 0)
885 return unified;
886 if (unified) /* doesn't apply to unified hierarchy */
887 return -EOPNOTSUPP;
888
889 r = cg_get_path(controller, NULL, "release_agent", &fs);
890 if (r < 0)
891 return r;
892
893 r = read_one_line_file(fs, &contents);
894 if (r < 0)
895 return r;
896
897 sc = strstrip(contents);
898 if (isempty(sc)) {
899 r = write_string_file(fs, agent, 0);
900 if (r < 0)
901 return r;
902 } else if (!path_equal(sc, agent))
903 return -EEXIST;
904
905 fs = mfree(fs);
906 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
907 if (r < 0)
908 return r;
909
910 contents = mfree(contents);
911 r = read_one_line_file(fs, &contents);
912 if (r < 0)
913 return r;
914
915 sc = strstrip(contents);
916 if (streq(sc, "0")) {
917 r = write_string_file(fs, "1", 0);
918 if (r < 0)
919 return r;
920
921 return 1;
922 }
923
924 if (!streq(sc, "1"))
925 return -EIO;
926
927 return 0;
928 }
929
930 int cg_uninstall_release_agent(const char *controller) {
931 _cleanup_free_ char *fs = NULL;
932 int r, unified;
933
934 unified = cg_unified();
935 if (unified < 0)
936 return unified;
937 if (unified) /* Doesn't apply to unified hierarchy */
938 return -EOPNOTSUPP;
939
940 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
941 if (r < 0)
942 return r;
943
944 r = write_string_file(fs, "0", 0);
945 if (r < 0)
946 return r;
947
948 fs = mfree(fs);
949
950 r = cg_get_path(controller, NULL, "release_agent", &fs);
951 if (r < 0)
952 return r;
953
954 r = write_string_file(fs, "", 0);
955 if (r < 0)
956 return r;
957
958 return 0;
959 }
960
961 int cg_is_empty(const char *controller, const char *path) {
962 _cleanup_fclose_ FILE *f = NULL;
963 pid_t pid;
964 int r;
965
966 assert(path);
967
968 r = cg_enumerate_processes(controller, path, &f);
969 if (r == -ENOENT)
970 return 1;
971 if (r < 0)
972 return r;
973
974 r = cg_read_pid(f, &pid);
975 if (r < 0)
976 return r;
977
978 return r == 0;
979 }
980
981 int cg_is_empty_recursive(const char *controller, const char *path) {
982 int unified, r;
983
984 assert(path);
985
986 /* The root cgroup is always populated */
987 if (controller && (isempty(path) || path_equal(path, "/")))
988 return false;
989
990 unified = cg_unified();
991 if (unified < 0)
992 return unified;
993
994 if (unified > 0) {
995 _cleanup_free_ char *populated = NULL, *t = NULL;
996
997 /* On the unified hierarchy we can check empty state
998 * via the "cgroup.populated" attribute. */
999
1000 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1001 if (r < 0)
1002 return r;
1003
1004 r = read_one_line_file(populated, &t);
1005 if (r == -ENOENT)
1006 return 1;
1007 if (r < 0)
1008 return r;
1009
1010 return streq(t, "0");
1011 } else {
1012 _cleanup_closedir_ DIR *d = NULL;
1013 char *fn;
1014
1015 r = cg_is_empty(controller, path);
1016 if (r <= 0)
1017 return r;
1018
1019 r = cg_enumerate_subgroups(controller, path, &d);
1020 if (r == -ENOENT)
1021 return 1;
1022 if (r < 0)
1023 return r;
1024
1025 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1026 _cleanup_free_ char *p = NULL;
1027
1028 p = strjoin(path, "/", fn, NULL);
1029 free(fn);
1030 if (!p)
1031 return -ENOMEM;
1032
1033 r = cg_is_empty_recursive(controller, p);
1034 if (r <= 0)
1035 return r;
1036 }
1037 if (r < 0)
1038 return r;
1039
1040 return true;
1041 }
1042 }
1043
1044 int cg_split_spec(const char *spec, char **controller, char **path) {
1045 char *t = NULL, *u = NULL;
1046 const char *e;
1047
1048 assert(spec);
1049
1050 if (*spec == '/') {
1051 if (!path_is_safe(spec))
1052 return -EINVAL;
1053
1054 if (path) {
1055 t = strdup(spec);
1056 if (!t)
1057 return -ENOMEM;
1058
1059 *path = path_kill_slashes(t);
1060 }
1061
1062 if (controller)
1063 *controller = NULL;
1064
1065 return 0;
1066 }
1067
1068 e = strchr(spec, ':');
1069 if (!e) {
1070 if (!cg_controller_is_valid(spec))
1071 return -EINVAL;
1072
1073 if (controller) {
1074 t = strdup(spec);
1075 if (!t)
1076 return -ENOMEM;
1077
1078 *controller = t;
1079 }
1080
1081 if (path)
1082 *path = NULL;
1083
1084 return 0;
1085 }
1086
1087 t = strndup(spec, e-spec);
1088 if (!t)
1089 return -ENOMEM;
1090 if (!cg_controller_is_valid(t)) {
1091 free(t);
1092 return -EINVAL;
1093 }
1094
1095 if (isempty(e+1))
1096 u = NULL;
1097 else {
1098 u = strdup(e+1);
1099 if (!u) {
1100 free(t);
1101 return -ENOMEM;
1102 }
1103
1104 if (!path_is_safe(u) ||
1105 !path_is_absolute(u)) {
1106 free(t);
1107 free(u);
1108 return -EINVAL;
1109 }
1110
1111 path_kill_slashes(u);
1112 }
1113
1114 if (controller)
1115 *controller = t;
1116 else
1117 free(t);
1118
1119 if (path)
1120 *path = u;
1121 else
1122 free(u);
1123
1124 return 0;
1125 }
1126
1127 int cg_mangle_path(const char *path, char **result) {
1128 _cleanup_free_ char *c = NULL, *p = NULL;
1129 char *t;
1130 int r;
1131
1132 assert(path);
1133 assert(result);
1134
1135 /* First, check if it already is a filesystem path */
1136 if (path_startswith(path, "/sys/fs/cgroup")) {
1137
1138 t = strdup(path);
1139 if (!t)
1140 return -ENOMEM;
1141
1142 *result = path_kill_slashes(t);
1143 return 0;
1144 }
1145
1146 /* Otherwise, treat it as cg spec */
1147 r = cg_split_spec(path, &c, &p);
1148 if (r < 0)
1149 return r;
1150
1151 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1152 }
1153
1154 int cg_get_root_path(char **path) {
1155 char *p, *e;
1156 int r;
1157
1158 assert(path);
1159
1160 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1161 if (r < 0)
1162 return r;
1163
1164 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1165 if (!e)
1166 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1167 if (!e)
1168 e = endswith(p, "/system"); /* even more legacy */
1169 if (e)
1170 *e = 0;
1171
1172 *path = p;
1173 return 0;
1174 }
1175
1176 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1177 _cleanup_free_ char *rt = NULL;
1178 char *p;
1179 int r;
1180
1181 assert(cgroup);
1182 assert(shifted);
1183
1184 if (!root) {
1185 /* If the root was specified let's use that, otherwise
1186 * let's determine it from PID 1 */
1187
1188 r = cg_get_root_path(&rt);
1189 if (r < 0)
1190 return r;
1191
1192 root = rt;
1193 }
1194
1195 p = path_startswith(cgroup, root);
1196 if (p && p > cgroup)
1197 *shifted = p - 1;
1198 else
1199 *shifted = cgroup;
1200
1201 return 0;
1202 }
1203
1204 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1205 _cleanup_free_ char *raw = NULL;
1206 const char *c;
1207 int r;
1208
1209 assert(pid >= 0);
1210 assert(cgroup);
1211
1212 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1213 if (r < 0)
1214 return r;
1215
1216 r = cg_shift_path(raw, root, &c);
1217 if (r < 0)
1218 return r;
1219
1220 if (c == raw) {
1221 *cgroup = raw;
1222 raw = NULL;
1223 } else {
1224 char *n;
1225
1226 n = strdup(c);
1227 if (!n)
1228 return -ENOMEM;
1229
1230 *cgroup = n;
1231 }
1232
1233 return 0;
1234 }
1235
1236 int cg_path_decode_unit(const char *cgroup, char **unit){
1237 char *c, *s;
1238 size_t n;
1239
1240 assert(cgroup);
1241 assert(unit);
1242
1243 n = strcspn(cgroup, "/");
1244 if (n < 3)
1245 return -ENXIO;
1246
1247 c = strndupa(cgroup, n);
1248 c = cg_unescape(c);
1249
1250 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1251 return -ENXIO;
1252
1253 s = strdup(c);
1254 if (!s)
1255 return -ENOMEM;
1256
1257 *unit = s;
1258 return 0;
1259 }
1260
1261 static bool valid_slice_name(const char *p, size_t n) {
1262
1263 if (!p)
1264 return false;
1265
1266 if (n < strlen("x.slice"))
1267 return false;
1268
1269 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1270 char buf[n+1], *c;
1271
1272 memcpy(buf, p, n);
1273 buf[n] = 0;
1274
1275 c = cg_unescape(buf);
1276
1277 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1278 }
1279
1280 return false;
1281 }
1282
1283 static const char *skip_slices(const char *p) {
1284 assert(p);
1285
1286 /* Skips over all slice assignments */
1287
1288 for (;;) {
1289 size_t n;
1290
1291 p += strspn(p, "/");
1292
1293 n = strcspn(p, "/");
1294 if (!valid_slice_name(p, n))
1295 return p;
1296
1297 p += n;
1298 }
1299 }
1300
1301 int cg_path_get_unit(const char *path, char **ret) {
1302 const char *e;
1303 char *unit;
1304 int r;
1305
1306 assert(path);
1307 assert(ret);
1308
1309 e = skip_slices(path);
1310
1311 r = cg_path_decode_unit(e, &unit);
1312 if (r < 0)
1313 return r;
1314
1315 /* We skipped over the slices, don't accept any now */
1316 if (endswith(unit, ".slice")) {
1317 free(unit);
1318 return -ENXIO;
1319 }
1320
1321 *ret = unit;
1322 return 0;
1323 }
1324
1325 int cg_pid_get_unit(pid_t pid, char **unit) {
1326 _cleanup_free_ char *cgroup = NULL;
1327 int r;
1328
1329 assert(unit);
1330
1331 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1332 if (r < 0)
1333 return r;
1334
1335 return cg_path_get_unit(cgroup, unit);
1336 }
1337
1338 /**
1339 * Skip session-*.scope, but require it to be there.
1340 */
1341 static const char *skip_session(const char *p) {
1342 size_t n;
1343
1344 if (isempty(p))
1345 return NULL;
1346
1347 p += strspn(p, "/");
1348
1349 n = strcspn(p, "/");
1350 if (n < strlen("session-x.scope"))
1351 return NULL;
1352
1353 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1354 char buf[n - 8 - 6 + 1];
1355
1356 memcpy(buf, p + 8, n - 8 - 6);
1357 buf[n - 8 - 6] = 0;
1358
1359 /* Note that session scopes never need unescaping,
1360 * since they cannot conflict with the kernel's own
1361 * names, hence we don't need to call cg_unescape()
1362 * here. */
1363
1364 if (!session_id_valid(buf))
1365 return false;
1366
1367 p += n;
1368 p += strspn(p, "/");
1369 return p;
1370 }
1371
1372 return NULL;
1373 }
1374
1375 /**
1376 * Skip user@*.service, but require it to be there.
1377 */
1378 static const char *skip_user_manager(const char *p) {
1379 size_t n;
1380
1381 if (isempty(p))
1382 return NULL;
1383
1384 p += strspn(p, "/");
1385
1386 n = strcspn(p, "/");
1387 if (n < strlen("user@x.service"))
1388 return NULL;
1389
1390 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1391 char buf[n - 5 - 8 + 1];
1392
1393 memcpy(buf, p + 5, n - 5 - 8);
1394 buf[n - 5 - 8] = 0;
1395
1396 /* Note that user manager services never need unescaping,
1397 * since they cannot conflict with the kernel's own
1398 * names, hence we don't need to call cg_unescape()
1399 * here. */
1400
1401 if (parse_uid(buf, NULL) < 0)
1402 return NULL;
1403
1404 p += n;
1405 p += strspn(p, "/");
1406
1407 return p;
1408 }
1409
1410 return NULL;
1411 }
1412
1413 static const char *skip_user_prefix(const char *path) {
1414 const char *e, *t;
1415
1416 assert(path);
1417
1418 /* Skip slices, if there are any */
1419 e = skip_slices(path);
1420
1421 /* Skip the user manager, if it's in the path now... */
1422 t = skip_user_manager(e);
1423 if (t)
1424 return t;
1425
1426 /* Alternatively skip the user session if it is in the path... */
1427 return skip_session(e);
1428 }
1429
1430 int cg_path_get_user_unit(const char *path, char **ret) {
1431 const char *t;
1432
1433 assert(path);
1434 assert(ret);
1435
1436 t = skip_user_prefix(path);
1437 if (!t)
1438 return -ENXIO;
1439
1440 /* And from here on it looks pretty much the same as for a
1441 * system unit, hence let's use the same parser from here
1442 * on. */
1443 return cg_path_get_unit(t, ret);
1444 }
1445
1446 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1447 _cleanup_free_ char *cgroup = NULL;
1448 int r;
1449
1450 assert(unit);
1451
1452 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1453 if (r < 0)
1454 return r;
1455
1456 return cg_path_get_user_unit(cgroup, unit);
1457 }
1458
1459 int cg_path_get_machine_name(const char *path, char **machine) {
1460 _cleanup_free_ char *u = NULL;
1461 const char *sl;
1462 int r;
1463
1464 r = cg_path_get_unit(path, &u);
1465 if (r < 0)
1466 return r;
1467
1468 sl = strjoina("/run/systemd/machines/unit:", u);
1469 return readlink_malloc(sl, machine);
1470 }
1471
1472 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1473 _cleanup_free_ char *cgroup = NULL;
1474 int r;
1475
1476 assert(machine);
1477
1478 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1479 if (r < 0)
1480 return r;
1481
1482 return cg_path_get_machine_name(cgroup, machine);
1483 }
1484
1485 int cg_path_get_session(const char *path, char **session) {
1486 _cleanup_free_ char *unit = NULL;
1487 char *start, *end;
1488 int r;
1489
1490 assert(path);
1491
1492 r = cg_path_get_unit(path, &unit);
1493 if (r < 0)
1494 return r;
1495
1496 start = startswith(unit, "session-");
1497 if (!start)
1498 return -ENXIO;
1499 end = endswith(start, ".scope");
1500 if (!end)
1501 return -ENXIO;
1502
1503 *end = 0;
1504 if (!session_id_valid(start))
1505 return -ENXIO;
1506
1507 if (session) {
1508 char *rr;
1509
1510 rr = strdup(start);
1511 if (!rr)
1512 return -ENOMEM;
1513
1514 *session = rr;
1515 }
1516
1517 return 0;
1518 }
1519
1520 int cg_pid_get_session(pid_t pid, char **session) {
1521 _cleanup_free_ char *cgroup = NULL;
1522 int r;
1523
1524 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1525 if (r < 0)
1526 return r;
1527
1528 return cg_path_get_session(cgroup, session);
1529 }
1530
1531 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1532 _cleanup_free_ char *slice = NULL;
1533 char *start, *end;
1534 int r;
1535
1536 assert(path);
1537
1538 r = cg_path_get_slice(path, &slice);
1539 if (r < 0)
1540 return r;
1541
1542 start = startswith(slice, "user-");
1543 if (!start)
1544 return -ENXIO;
1545 end = endswith(start, ".slice");
1546 if (!end)
1547 return -ENXIO;
1548
1549 *end = 0;
1550 if (parse_uid(start, uid) < 0)
1551 return -ENXIO;
1552
1553 return 0;
1554 }
1555
1556 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1557 _cleanup_free_ char *cgroup = NULL;
1558 int r;
1559
1560 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1561 if (r < 0)
1562 return r;
1563
1564 return cg_path_get_owner_uid(cgroup, uid);
1565 }
1566
1567 int cg_path_get_slice(const char *p, char **slice) {
1568 const char *e = NULL;
1569
1570 assert(p);
1571 assert(slice);
1572
1573 /* Finds the right-most slice unit from the beginning, but
1574 * stops before we come to the first non-slice unit. */
1575
1576 for (;;) {
1577 size_t n;
1578
1579 p += strspn(p, "/");
1580
1581 n = strcspn(p, "/");
1582 if (!valid_slice_name(p, n)) {
1583
1584 if (!e) {
1585 char *s;
1586
1587 s = strdup("-.slice");
1588 if (!s)
1589 return -ENOMEM;
1590
1591 *slice = s;
1592 return 0;
1593 }
1594
1595 return cg_path_decode_unit(e, slice);
1596 }
1597
1598 e = p;
1599 p += n;
1600 }
1601 }
1602
1603 int cg_pid_get_slice(pid_t pid, char **slice) {
1604 _cleanup_free_ char *cgroup = NULL;
1605 int r;
1606
1607 assert(slice);
1608
1609 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1610 if (r < 0)
1611 return r;
1612
1613 return cg_path_get_slice(cgroup, slice);
1614 }
1615
1616 int cg_path_get_user_slice(const char *p, char **slice) {
1617 const char *t;
1618 assert(p);
1619 assert(slice);
1620
1621 t = skip_user_prefix(p);
1622 if (!t)
1623 return -ENXIO;
1624
1625 /* And now it looks pretty much the same as for a system
1626 * slice, so let's just use the same parser from here on. */
1627 return cg_path_get_slice(t, slice);
1628 }
1629
1630 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1631 _cleanup_free_ char *cgroup = NULL;
1632 int r;
1633
1634 assert(slice);
1635
1636 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1637 if (r < 0)
1638 return r;
1639
1640 return cg_path_get_user_slice(cgroup, slice);
1641 }
1642
1643 char *cg_escape(const char *p) {
1644 bool need_prefix = false;
1645
1646 /* This implements very minimal escaping for names to be used
1647 * as file names in the cgroup tree: any name which might
1648 * conflict with a kernel name or is prefixed with '_' is
1649 * prefixed with a '_'. That way, when reading cgroup names it
1650 * is sufficient to remove a single prefixing underscore if
1651 * there is one. */
1652
1653 /* The return value of this function (unlike cg_unescape())
1654 * needs free()! */
1655
1656 if (p[0] == 0 ||
1657 p[0] == '_' ||
1658 p[0] == '.' ||
1659 streq(p, "notify_on_release") ||
1660 streq(p, "release_agent") ||
1661 streq(p, "tasks") ||
1662 startswith(p, "cgroup."))
1663 need_prefix = true;
1664 else {
1665 const char *dot;
1666
1667 dot = strrchr(p, '.');
1668 if (dot) {
1669 CGroupController c;
1670 size_t l = dot - p;
1671
1672 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1673 const char *n;
1674
1675 n = cgroup_controller_to_string(c);
1676
1677 if (l != strlen(n))
1678 continue;
1679
1680 if (memcmp(p, n, l) != 0)
1681 continue;
1682
1683 need_prefix = true;
1684 break;
1685 }
1686 }
1687 }
1688
1689 if (need_prefix)
1690 return strappend("_", p);
1691
1692 return strdup(p);
1693 }
1694
1695 char *cg_unescape(const char *p) {
1696 assert(p);
1697
1698 /* The return value of this function (unlike cg_escape())
1699 * doesn't need free()! */
1700
1701 if (p[0] == '_')
1702 return (char*) p+1;
1703
1704 return (char*) p;
1705 }
1706
1707 #define CONTROLLER_VALID \
1708 DIGITS LETTERS \
1709 "_"
1710
1711 bool cg_controller_is_valid(const char *p) {
1712 const char *t, *s;
1713
1714 if (!p)
1715 return false;
1716
1717 s = startswith(p, "name=");
1718 if (s)
1719 p = s;
1720
1721 if (*p == 0 || *p == '_')
1722 return false;
1723
1724 for (t = p; *t; t++)
1725 if (!strchr(CONTROLLER_VALID, *t))
1726 return false;
1727
1728 if (t - p > FILENAME_MAX)
1729 return false;
1730
1731 return true;
1732 }
1733
1734 int cg_slice_to_path(const char *unit, char **ret) {
1735 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1736 const char *dash;
1737 int r;
1738
1739 assert(unit);
1740 assert(ret);
1741
1742 if (streq(unit, "-.slice")) {
1743 char *x;
1744
1745 x = strdup("");
1746 if (!x)
1747 return -ENOMEM;
1748 *ret = x;
1749 return 0;
1750 }
1751
1752 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1753 return -EINVAL;
1754
1755 if (!endswith(unit, ".slice"))
1756 return -EINVAL;
1757
1758 r = unit_name_to_prefix(unit, &p);
1759 if (r < 0)
1760 return r;
1761
1762 dash = strchr(p, '-');
1763
1764 /* Don't allow initial dashes */
1765 if (dash == p)
1766 return -EINVAL;
1767
1768 while (dash) {
1769 _cleanup_free_ char *escaped = NULL;
1770 char n[dash - p + sizeof(".slice")];
1771
1772 /* Don't allow trailing or double dashes */
1773 if (dash[1] == 0 || dash[1] == '-')
1774 return -EINVAL;
1775
1776 strcpy(stpncpy(n, p, dash - p), ".slice");
1777 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1778 return -EINVAL;
1779
1780 escaped = cg_escape(n);
1781 if (!escaped)
1782 return -ENOMEM;
1783
1784 if (!strextend(&s, escaped, "/", NULL))
1785 return -ENOMEM;
1786
1787 dash = strchr(dash+1, '-');
1788 }
1789
1790 e = cg_escape(unit);
1791 if (!e)
1792 return -ENOMEM;
1793
1794 if (!strextend(&s, e, NULL))
1795 return -ENOMEM;
1796
1797 *ret = s;
1798 s = NULL;
1799
1800 return 0;
1801 }
1802
1803 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1804 _cleanup_free_ char *p = NULL;
1805 int r;
1806
1807 r = cg_get_path(controller, path, attribute, &p);
1808 if (r < 0)
1809 return r;
1810
1811 return write_string_file(p, value, 0);
1812 }
1813
1814 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1815 _cleanup_free_ char *p = NULL;
1816 int r;
1817
1818 r = cg_get_path(controller, path, attribute, &p);
1819 if (r < 0)
1820 return r;
1821
1822 return read_one_line_file(p, ret);
1823 }
1824
1825 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1826 CGroupController c;
1827 int r, unified;
1828
1829 /* This one will create a cgroup in our private tree, but also
1830 * duplicate it in the trees specified in mask, and remove it
1831 * in all others */
1832
1833 /* First create the cgroup in our own hierarchy. */
1834 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1835 if (r < 0)
1836 return r;
1837
1838 /* If we are in the unified hierarchy, we are done now */
1839 unified = cg_unified();
1840 if (unified < 0)
1841 return unified;
1842 if (unified > 0)
1843 return 0;
1844
1845 /* Otherwise, do the same in the other hierarchies */
1846 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1847 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1848 const char *n;
1849
1850 n = cgroup_controller_to_string(c);
1851
1852 if (mask & bit)
1853 (void) cg_create(n, path);
1854 else if (supported & bit)
1855 (void) cg_trim(n, path, true);
1856 }
1857
1858 return 0;
1859 }
1860
1861 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1862 CGroupController c;
1863 int r, unified;
1864
1865 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1866 if (r < 0)
1867 return r;
1868
1869 unified = cg_unified();
1870 if (unified < 0)
1871 return unified;
1872 if (unified > 0)
1873 return 0;
1874
1875 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1876 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1877 const char *p = NULL;
1878
1879 if (!(supported & bit))
1880 continue;
1881
1882 if (path_callback)
1883 p = path_callback(bit, userdata);
1884
1885 if (!p)
1886 p = path;
1887
1888 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1889 }
1890
1891 return 0;
1892 }
1893
1894 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1895 Iterator i;
1896 void *pidp;
1897 int r = 0;
1898
1899 SET_FOREACH(pidp, pids, i) {
1900 pid_t pid = PTR_TO_PID(pidp);
1901 int q;
1902
1903 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1904 if (q < 0 && r >= 0)
1905 r = q;
1906 }
1907
1908 return r;
1909 }
1910
1911 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1912 CGroupController c;
1913 int r = 0, unified;
1914
1915 if (!path_equal(from, to)) {
1916 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1917 if (r < 0)
1918 return r;
1919 }
1920
1921 unified = cg_unified();
1922 if (unified < 0)
1923 return unified;
1924 if (unified > 0)
1925 return r;
1926
1927 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1928 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1929 const char *p = NULL;
1930
1931 if (!(supported & bit))
1932 continue;
1933
1934 if (to_callback)
1935 p = to_callback(bit, userdata);
1936
1937 if (!p)
1938 p = to;
1939
1940 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1941 }
1942
1943 return 0;
1944 }
1945
1946 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1947 CGroupController c;
1948 int r, unified;
1949
1950 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1951 if (r < 0)
1952 return r;
1953
1954 unified = cg_unified();
1955 if (unified < 0)
1956 return unified;
1957 if (unified > 0)
1958 return r;
1959
1960 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1961 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1962
1963 if (!(supported & bit))
1964 continue;
1965
1966 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1967 }
1968
1969 return 0;
1970 }
1971
1972 int cg_mask_supported(CGroupMask *ret) {
1973 CGroupMask mask = 0;
1974 int r, unified;
1975
1976 /* Determines the mask of supported cgroup controllers. Only
1977 * includes controllers we can make sense of and that are
1978 * actually accessible. */
1979
1980 unified = cg_unified();
1981 if (unified < 0)
1982 return unified;
1983 if (unified > 0) {
1984 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1985 const char *c;
1986
1987 /* In the unified hierarchy we can read the supported
1988 * and accessible controllers from a the top-level
1989 * cgroup attribute */
1990
1991 r = cg_get_root_path(&root);
1992 if (r < 0)
1993 return r;
1994
1995 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1996 if (r < 0)
1997 return r;
1998
1999 r = read_one_line_file(path, &controllers);
2000 if (r < 0)
2001 return r;
2002
2003 c = controllers;
2004 for (;;) {
2005 _cleanup_free_ char *n = NULL;
2006 CGroupController v;
2007
2008 r = extract_first_word(&c, &n, NULL, 0);
2009 if (r < 0)
2010 return r;
2011 if (r == 0)
2012 break;
2013
2014 v = cgroup_controller_from_string(n);
2015 if (v < 0)
2016 continue;
2017
2018 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2019 }
2020
2021 /* Currently, we only support the memory and pids
2022 * controller in the unified hierarchy, mask
2023 * everything else off. */
2024 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2025
2026 } else {
2027 CGroupController c;
2028
2029 /* In the legacy hierarchy, we check whether which
2030 * hierarchies are mounted. */
2031
2032 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2033 const char *n;
2034
2035 n = cgroup_controller_to_string(c);
2036 if (controller_is_accessible(n) >= 0)
2037 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2038 }
2039 }
2040
2041 *ret = mask;
2042 return 0;
2043 }
2044
2045 int cg_kernel_controllers(Set *controllers) {
2046 _cleanup_fclose_ FILE *f = NULL;
2047 char buf[LINE_MAX];
2048 int r;
2049
2050 assert(controllers);
2051
2052 /* Determines the full list of kernel-known controllers. Might
2053 * include controllers we don't actually support, arbitrary
2054 * named hierarchies and controllers that aren't currently
2055 * accessible (because not mounted). */
2056
2057 f = fopen("/proc/cgroups", "re");
2058 if (!f) {
2059 if (errno == ENOENT)
2060 return 0;
2061 return -errno;
2062 }
2063
2064 /* Ignore the header line */
2065 (void) fgets(buf, sizeof(buf), f);
2066
2067 for (;;) {
2068 char *controller;
2069 int enabled = 0;
2070
2071 errno = 0;
2072 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2073
2074 if (feof(f))
2075 break;
2076
2077 if (ferror(f) && errno != 0)
2078 return -errno;
2079
2080 return -EBADMSG;
2081 }
2082
2083 if (!enabled) {
2084 free(controller);
2085 continue;
2086 }
2087
2088 if (!cg_controller_is_valid(controller)) {
2089 free(controller);
2090 return -EBADMSG;
2091 }
2092
2093 r = set_consume(controllers, controller);
2094 if (r < 0)
2095 return r;
2096 }
2097
2098 return 0;
2099 }
2100
2101 static thread_local int unified_cache = -1;
2102
2103 int cg_unified(void) {
2104 struct statfs fs;
2105
2106 /* Checks if we support the unified hierarchy. Returns an
2107 * error when the cgroup hierarchies aren't mounted yet or we
2108 * have any other trouble determining if the unified hierarchy
2109 * is supported. */
2110
2111 if (unified_cache >= 0)
2112 return unified_cache;
2113
2114 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2115 return -errno;
2116
2117 if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2118 unified_cache = true;
2119 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2120 unified_cache = false;
2121 else
2122 return -ENOEXEC;
2123
2124 return unified_cache;
2125 }
2126
2127 void cg_unified_flush(void) {
2128 unified_cache = -1;
2129 }
2130
2131 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2132 _cleanup_free_ char *fs = NULL;
2133 CGroupController c;
2134 int r, unified;
2135
2136 assert(p);
2137
2138 if (supported == 0)
2139 return 0;
2140
2141 unified = cg_unified();
2142 if (unified < 0)
2143 return unified;
2144 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2145 return 0;
2146
2147 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2148 if (r < 0)
2149 return r;
2150
2151 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2152 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2153 const char *n;
2154
2155 if (!(supported & bit))
2156 continue;
2157
2158 n = cgroup_controller_to_string(c);
2159 {
2160 char s[1 + strlen(n) + 1];
2161
2162 s[0] = mask & bit ? '+' : '-';
2163 strcpy(s + 1, n);
2164
2165 r = write_string_file(fs, s, 0);
2166 if (r < 0)
2167 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2168 }
2169 }
2170
2171 return 0;
2172 }
2173
2174 bool cg_is_unified_wanted(void) {
2175 static thread_local int wanted = -1;
2176 int r, unified;
2177
2178 /* If the hierarchy is already mounted, then follow whatever
2179 * was chosen for it. */
2180 unified = cg_unified();
2181 if (unified >= 0)
2182 return unified;
2183
2184 /* Otherwise, let's see what the kernel command line has to
2185 * say. Since checking that is expensive, let's cache the
2186 * result. */
2187 if (wanted >= 0)
2188 return wanted;
2189
2190 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2191 if (r > 0)
2192 return (wanted = true);
2193 else {
2194 _cleanup_free_ char *value = NULL;
2195
2196 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2197 if (r < 0)
2198 return false;
2199 if (r == 0)
2200 return (wanted = false);
2201
2202 return (wanted = parse_boolean(value) > 0);
2203 }
2204 }
2205
2206 bool cg_is_legacy_wanted(void) {
2207 return !cg_is_unified_wanted();
2208 }
2209
2210 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2211 uint64_t u;
2212 int r;
2213
2214 if (isempty(s)) {
2215 *ret = CGROUP_CPU_SHARES_INVALID;
2216 return 0;
2217 }
2218
2219 r = safe_atou64(s, &u);
2220 if (r < 0)
2221 return r;
2222
2223 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2224 return -ERANGE;
2225
2226 *ret = u;
2227 return 0;
2228 }
2229
2230 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2231 uint64_t u;
2232 int r;
2233
2234 if (isempty(s)) {
2235 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2236 return 0;
2237 }
2238
2239 r = safe_atou64(s, &u);
2240 if (r < 0)
2241 return r;
2242
2243 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2244 return -ERANGE;
2245
2246 *ret = u;
2247 return 0;
2248 }
2249
2250 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2251 [CGROUP_CONTROLLER_CPU] = "cpu",
2252 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2253 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2254 [CGROUP_CONTROLLER_MEMORY] = "memory",
2255 [CGROUP_CONTROLLER_DEVICES] = "devices",
2256 [CGROUP_CONTROLLER_PIDS] = "pids",
2257 [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2258 };
2259
2260 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);