]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
sd-event: instrument sd_event_run() for profiling delays
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dirent.h>
23 #include <errno.h>
24 #include <ftw.h>
25 #include <limits.h>
26 #include <signal.h>
27 #include <stddef.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/stat.h>
31 #include <sys/statfs.h>
32 #include <sys/types.h>
33 #include <unistd.h>
34
35 #include "alloc-util.h"
36 #include "cgroup-util.h"
37 #include "def.h"
38 #include "dirent-util.h"
39 #include "extract-word.h"
40 #include "fd-util.h"
41 #include "fileio.h"
42 #include "formats-util.h"
43 #include "fs-util.h"
44 #include "log.h"
45 #include "login-util.h"
46 #include "macro.h"
47 #include "missing.h"
48 #include "mkdir.h"
49 #include "parse-util.h"
50 #include "path-util.h"
51 #include "proc-cmdline.h"
52 #include "process-util.h"
53 #include "set.h"
54 #include "special.h"
55 #include "stat-util.h"
56 #include "string-table.h"
57 #include "string-util.h"
58 #include "unit-name.h"
59 #include "user-util.h"
60
61 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
62 _cleanup_free_ char *fs = NULL;
63 FILE *f;
64 int r;
65
66 assert(_f);
67
68 r = cg_get_path(controller, path, "cgroup.procs", &fs);
69 if (r < 0)
70 return r;
71
72 f = fopen(fs, "re");
73 if (!f)
74 return -errno;
75
76 *_f = f;
77 return 0;
78 }
79
80 int cg_read_pid(FILE *f, pid_t *_pid) {
81 unsigned long ul;
82
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
85
86 assert(f);
87 assert(_pid);
88
89 errno = 0;
90 if (fscanf(f, "%lu", &ul) != 1) {
91
92 if (feof(f))
93 return 0;
94
95 return errno ? -errno : -EIO;
96 }
97
98 if (ul <= 0)
99 return -EIO;
100
101 *_pid = (pid_t) ul;
102 return 1;
103 }
104
105 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
106 _cleanup_free_ char *fs = NULL;
107 int r;
108 DIR *d;
109
110 assert(_d);
111
112 /* This is not recursive! */
113
114 r = cg_get_path(controller, path, NULL, &fs);
115 if (r < 0)
116 return r;
117
118 d = opendir(fs);
119 if (!d)
120 return -errno;
121
122 *_d = d;
123 return 0;
124 }
125
126 int cg_read_subgroup(DIR *d, char **fn) {
127 struct dirent *de;
128
129 assert(d);
130 assert(fn);
131
132 FOREACH_DIRENT_ALL(de, d, return -errno) {
133 char *b;
134
135 if (de->d_type != DT_DIR)
136 continue;
137
138 if (streq(de->d_name, ".") ||
139 streq(de->d_name, ".."))
140 continue;
141
142 b = strdup(de->d_name);
143 if (!b)
144 return -ENOMEM;
145
146 *fn = b;
147 return 1;
148 }
149
150 return 0;
151 }
152
153 int cg_rmdir(const char *controller, const char *path) {
154 _cleanup_free_ char *p = NULL;
155 int r;
156
157 r = cg_get_path(controller, path, NULL, &p);
158 if (r < 0)
159 return r;
160
161 r = rmdir(p);
162 if (r < 0 && errno != ENOENT)
163 return -errno;
164
165 return 0;
166 }
167
168 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
169 _cleanup_set_free_ Set *allocated_set = NULL;
170 bool done = false;
171 int r, ret = 0;
172 pid_t my_pid;
173
174 assert(sig >= 0);
175
176 /* This goes through the tasks list and kills them all. This
177 * is repeated until no further processes are added to the
178 * tasks list, to properly handle forking processes */
179
180 if (!s) {
181 s = allocated_set = set_new(NULL);
182 if (!s)
183 return -ENOMEM;
184 }
185
186 my_pid = getpid();
187
188 do {
189 _cleanup_fclose_ FILE *f = NULL;
190 pid_t pid = 0;
191 done = true;
192
193 r = cg_enumerate_processes(controller, path, &f);
194 if (r < 0) {
195 if (ret >= 0 && r != -ENOENT)
196 return r;
197
198 return ret;
199 }
200
201 while ((r = cg_read_pid(f, &pid)) > 0) {
202
203 if (ignore_self && pid == my_pid)
204 continue;
205
206 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
207 continue;
208
209 /* If we haven't killed this process yet, kill
210 * it */
211 if (kill(pid, sig) < 0) {
212 if (ret >= 0 && errno != ESRCH)
213 ret = -errno;
214 } else {
215 if (sigcont && sig != SIGKILL)
216 (void) kill(pid, SIGCONT);
217
218 if (ret == 0)
219 ret = 1;
220 }
221
222 done = false;
223
224 r = set_put(s, PID_TO_PTR(pid));
225 if (r < 0) {
226 if (ret >= 0)
227 return r;
228
229 return ret;
230 }
231 }
232
233 if (r < 0) {
234 if (ret >= 0)
235 return r;
236
237 return ret;
238 }
239
240 /* To avoid racing against processes which fork
241 * quicker than we can kill them we repeat this until
242 * no new pids need to be killed. */
243
244 } while (!done);
245
246 return ret;
247 }
248
249 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
250 _cleanup_set_free_ Set *allocated_set = NULL;
251 _cleanup_closedir_ DIR *d = NULL;
252 int r, ret;
253 char *fn;
254
255 assert(path);
256 assert(sig >= 0);
257
258 if (!s) {
259 s = allocated_set = set_new(NULL);
260 if (!s)
261 return -ENOMEM;
262 }
263
264 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
265
266 r = cg_enumerate_subgroups(controller, path, &d);
267 if (r < 0) {
268 if (ret >= 0 && r != -ENOENT)
269 return r;
270
271 return ret;
272 }
273
274 while ((r = cg_read_subgroup(d, &fn)) > 0) {
275 _cleanup_free_ char *p = NULL;
276
277 p = strjoin(path, "/", fn, NULL);
278 free(fn);
279 if (!p)
280 return -ENOMEM;
281
282 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
283 if (r != 0 && ret >= 0)
284 ret = r;
285 }
286
287 if (ret >= 0 && r < 0)
288 ret = r;
289
290 if (rem) {
291 r = cg_rmdir(controller, path);
292 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
293 return r;
294 }
295
296 return ret;
297 }
298
299 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
300 bool done = false;
301 _cleanup_set_free_ Set *s = NULL;
302 int r, ret = 0;
303 pid_t my_pid;
304
305 assert(cfrom);
306 assert(pfrom);
307 assert(cto);
308 assert(pto);
309
310 s = set_new(NULL);
311 if (!s)
312 return -ENOMEM;
313
314 my_pid = getpid();
315
316 do {
317 _cleanup_fclose_ FILE *f = NULL;
318 pid_t pid = 0;
319 done = true;
320
321 r = cg_enumerate_processes(cfrom, pfrom, &f);
322 if (r < 0) {
323 if (ret >= 0 && r != -ENOENT)
324 return r;
325
326 return ret;
327 }
328
329 while ((r = cg_read_pid(f, &pid)) > 0) {
330
331 /* This might do weird stuff if we aren't a
332 * single-threaded program. However, we
333 * luckily know we are not */
334 if (ignore_self && pid == my_pid)
335 continue;
336
337 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
338 continue;
339
340 /* Ignore kernel threads. Since they can only
341 * exist in the root cgroup, we only check for
342 * them there. */
343 if (cfrom &&
344 (isempty(pfrom) || path_equal(pfrom, "/")) &&
345 is_kernel_thread(pid) > 0)
346 continue;
347
348 r = cg_attach(cto, pto, pid);
349 if (r < 0) {
350 if (ret >= 0 && r != -ESRCH)
351 ret = r;
352 } else if (ret == 0)
353 ret = 1;
354
355 done = false;
356
357 r = set_put(s, PID_TO_PTR(pid));
358 if (r < 0) {
359 if (ret >= 0)
360 return r;
361
362 return ret;
363 }
364 }
365
366 if (r < 0) {
367 if (ret >= 0)
368 return r;
369
370 return ret;
371 }
372 } while (!done);
373
374 return ret;
375 }
376
377 int cg_migrate_recursive(
378 const char *cfrom,
379 const char *pfrom,
380 const char *cto,
381 const char *pto,
382 bool ignore_self,
383 bool rem) {
384
385 _cleanup_closedir_ DIR *d = NULL;
386 int r, ret = 0;
387 char *fn;
388
389 assert(cfrom);
390 assert(pfrom);
391 assert(cto);
392 assert(pto);
393
394 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
395
396 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
397 if (r < 0) {
398 if (ret >= 0 && r != -ENOENT)
399 return r;
400
401 return ret;
402 }
403
404 while ((r = cg_read_subgroup(d, &fn)) > 0) {
405 _cleanup_free_ char *p = NULL;
406
407 p = strjoin(pfrom, "/", fn, NULL);
408 free(fn);
409 if (!p)
410 return -ENOMEM;
411
412 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
413 if (r != 0 && ret >= 0)
414 ret = r;
415 }
416
417 if (r < 0 && ret >= 0)
418 ret = r;
419
420 if (rem) {
421 r = cg_rmdir(cfrom, pfrom);
422 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
423 return r;
424 }
425
426 return ret;
427 }
428
429 int cg_migrate_recursive_fallback(
430 const char *cfrom,
431 const char *pfrom,
432 const char *cto,
433 const char *pto,
434 bool ignore_self,
435 bool rem) {
436
437 int r;
438
439 assert(cfrom);
440 assert(pfrom);
441 assert(cto);
442 assert(pto);
443
444 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
445 if (r < 0) {
446 char prefix[strlen(pto) + 1];
447
448 /* This didn't work? Then let's try all prefixes of the destination */
449
450 PATH_FOREACH_PREFIX(prefix, pto) {
451 int q;
452
453 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
454 if (q >= 0)
455 return q;
456 }
457 }
458
459 return r;
460 }
461
462 static const char *controller_to_dirname(const char *controller) {
463 const char *e;
464
465 assert(controller);
466
467 /* Converts a controller name to the directory name below
468 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
469 * just cuts off the name= prefixed used for named
470 * hierarchies, if it is specified. */
471
472 e = startswith(controller, "name=");
473 if (e)
474 return e;
475
476 return controller;
477 }
478
479 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
480 const char *dn;
481 char *t = NULL;
482
483 assert(fs);
484 assert(controller);
485
486 dn = controller_to_dirname(controller);
487
488 if (isempty(path) && isempty(suffix))
489 t = strappend("/sys/fs/cgroup/", dn);
490 else if (isempty(path))
491 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
492 else if (isempty(suffix))
493 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
494 else
495 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
496 if (!t)
497 return -ENOMEM;
498
499 *fs = t;
500 return 0;
501 }
502
503 static int join_path_unified(const char *path, const char *suffix, char **fs) {
504 char *t;
505
506 assert(fs);
507
508 if (isempty(path) && isempty(suffix))
509 t = strdup("/sys/fs/cgroup");
510 else if (isempty(path))
511 t = strappend("/sys/fs/cgroup/", suffix);
512 else if (isempty(suffix))
513 t = strappend("/sys/fs/cgroup/", path);
514 else
515 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
516 if (!t)
517 return -ENOMEM;
518
519 *fs = t;
520 return 0;
521 }
522
523 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
524 int unified, r;
525
526 assert(fs);
527
528 if (!controller) {
529 char *t;
530
531 /* If no controller is specified, we return the path
532 * *below* the controllers, without any prefix. */
533
534 if (!path && !suffix)
535 return -EINVAL;
536
537 if (!suffix)
538 t = strdup(path);
539 else if (!path)
540 t = strdup(suffix);
541 else
542 t = strjoin(path, "/", suffix, NULL);
543 if (!t)
544 return -ENOMEM;
545
546 *fs = path_kill_slashes(t);
547 return 0;
548 }
549
550 if (!cg_controller_is_valid(controller))
551 return -EINVAL;
552
553 unified = cg_unified();
554 if (unified < 0)
555 return unified;
556
557 if (unified > 0)
558 r = join_path_unified(path, suffix, fs);
559 else
560 r = join_path_legacy(controller, path, suffix, fs);
561 if (r < 0)
562 return r;
563
564 path_kill_slashes(*fs);
565 return 0;
566 }
567
568 static int controller_is_accessible(const char *controller) {
569 int unified;
570
571 assert(controller);
572
573 /* Checks whether a specific controller is accessible,
574 * i.e. its hierarchy mounted. In the unified hierarchy all
575 * controllers are considered accessible, except for the named
576 * hierarchies */
577
578 if (!cg_controller_is_valid(controller))
579 return -EINVAL;
580
581 unified = cg_unified();
582 if (unified < 0)
583 return unified;
584 if (unified > 0) {
585 /* We don't support named hierarchies if we are using
586 * the unified hierarchy. */
587
588 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
589 return 0;
590
591 if (startswith(controller, "name="))
592 return -EOPNOTSUPP;
593
594 } else {
595 const char *cc, *dn;
596
597 dn = controller_to_dirname(controller);
598 cc = strjoina("/sys/fs/cgroup/", dn);
599
600 if (laccess(cc, F_OK) < 0)
601 return -errno;
602 }
603
604 return 0;
605 }
606
607 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
608 int r;
609
610 assert(controller);
611 assert(fs);
612
613 /* Check if the specified controller is actually accessible */
614 r = controller_is_accessible(controller);
615 if (r < 0)
616 return r;
617
618 return cg_get_path(controller, path, suffix, fs);
619 }
620
621 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
622 assert(path);
623 assert(sb);
624 assert(ftwbuf);
625
626 if (typeflag != FTW_DP)
627 return 0;
628
629 if (ftwbuf->level < 1)
630 return 0;
631
632 (void) rmdir(path);
633 return 0;
634 }
635
636 int cg_trim(const char *controller, const char *path, bool delete_root) {
637 _cleanup_free_ char *fs = NULL;
638 int r = 0;
639
640 assert(path);
641
642 r = cg_get_path(controller, path, NULL, &fs);
643 if (r < 0)
644 return r;
645
646 errno = 0;
647 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
648 if (errno == ENOENT)
649 r = 0;
650 else if (errno != 0)
651 r = -errno;
652 else
653 r = -EIO;
654 }
655
656 if (delete_root) {
657 if (rmdir(fs) < 0 && errno != ENOENT)
658 return -errno;
659 }
660
661 return r;
662 }
663
664 int cg_create(const char *controller, const char *path) {
665 _cleanup_free_ char *fs = NULL;
666 int r;
667
668 r = cg_get_path_and_check(controller, path, NULL, &fs);
669 if (r < 0)
670 return r;
671
672 r = mkdir_parents(fs, 0755);
673 if (r < 0)
674 return r;
675
676 if (mkdir(fs, 0755) < 0) {
677
678 if (errno == EEXIST)
679 return 0;
680
681 return -errno;
682 }
683
684 return 1;
685 }
686
687 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
688 int r, q;
689
690 assert(pid >= 0);
691
692 r = cg_create(controller, path);
693 if (r < 0)
694 return r;
695
696 q = cg_attach(controller, path, pid);
697 if (q < 0)
698 return q;
699
700 /* This does not remove the cgroup on failure */
701 return r;
702 }
703
704 int cg_attach(const char *controller, const char *path, pid_t pid) {
705 _cleanup_free_ char *fs = NULL;
706 char c[DECIMAL_STR_MAX(pid_t) + 2];
707 int r;
708
709 assert(path);
710 assert(pid >= 0);
711
712 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
713 if (r < 0)
714 return r;
715
716 if (pid == 0)
717 pid = getpid();
718
719 snprintf(c, sizeof(c), PID_FMT"\n", pid);
720
721 return write_string_file(fs, c, 0);
722 }
723
724 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
725 int r;
726
727 assert(controller);
728 assert(path);
729 assert(pid >= 0);
730
731 r = cg_attach(controller, path, pid);
732 if (r < 0) {
733 char prefix[strlen(path) + 1];
734
735 /* This didn't work? Then let's try all prefixes of
736 * the destination */
737
738 PATH_FOREACH_PREFIX(prefix, path) {
739 int q;
740
741 q = cg_attach(controller, prefix, pid);
742 if (q >= 0)
743 return q;
744 }
745 }
746
747 return r;
748 }
749
750 int cg_set_group_access(
751 const char *controller,
752 const char *path,
753 mode_t mode,
754 uid_t uid,
755 gid_t gid) {
756
757 _cleanup_free_ char *fs = NULL;
758 int r;
759
760 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
761 return 0;
762
763 if (mode != MODE_INVALID)
764 mode &= 0777;
765
766 r = cg_get_path(controller, path, NULL, &fs);
767 if (r < 0)
768 return r;
769
770 return chmod_and_chown(fs, mode, uid, gid);
771 }
772
773 int cg_set_task_access(
774 const char *controller,
775 const char *path,
776 mode_t mode,
777 uid_t uid,
778 gid_t gid) {
779
780 _cleanup_free_ char *fs = NULL, *procs = NULL;
781 int r, unified;
782
783 assert(path);
784
785 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
786 return 0;
787
788 if (mode != MODE_INVALID)
789 mode &= 0666;
790
791 r = cg_get_path(controller, path, "cgroup.procs", &fs);
792 if (r < 0)
793 return r;
794
795 r = chmod_and_chown(fs, mode, uid, gid);
796 if (r < 0)
797 return r;
798
799 unified = cg_unified();
800 if (unified < 0)
801 return unified;
802 if (unified)
803 return 0;
804
805 /* Compatibility, Always keep values for "tasks" in sync with
806 * "cgroup.procs" */
807 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
808 (void) chmod_and_chown(procs, mode, uid, gid);
809
810 return 0;
811 }
812
813 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
814 _cleanup_fclose_ FILE *f = NULL;
815 char line[LINE_MAX];
816 const char *fs;
817 size_t cs = 0;
818 int unified;
819
820 assert(path);
821 assert(pid >= 0);
822
823 unified = cg_unified();
824 if (unified < 0)
825 return unified;
826 if (unified == 0) {
827 if (controller) {
828 if (!cg_controller_is_valid(controller))
829 return -EINVAL;
830 } else
831 controller = SYSTEMD_CGROUP_CONTROLLER;
832
833 cs = strlen(controller);
834 }
835
836 fs = procfs_file_alloca(pid, "cgroup");
837 f = fopen(fs, "re");
838 if (!f)
839 return errno == ENOENT ? -ESRCH : -errno;
840
841 FOREACH_LINE(line, f, return -errno) {
842 char *e, *p;
843
844 truncate_nl(line);
845
846 if (unified) {
847 e = startswith(line, "0:");
848 if (!e)
849 continue;
850
851 e = strchr(e, ':');
852 if (!e)
853 continue;
854 } else {
855 char *l;
856 size_t k;
857 const char *word, *state;
858 bool found = false;
859
860 l = strchr(line, ':');
861 if (!l)
862 continue;
863
864 l++;
865 e = strchr(l, ':');
866 if (!e)
867 continue;
868
869 *e = 0;
870 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
871 if (k == cs && memcmp(word, controller, cs) == 0) {
872 found = true;
873 break;
874 }
875 }
876
877 if (!found)
878 continue;
879 }
880
881 p = strdup(e + 1);
882 if (!p)
883 return -ENOMEM;
884
885 *path = p;
886 return 0;
887 }
888
889 return -ENODATA;
890 }
891
892 int cg_install_release_agent(const char *controller, const char *agent) {
893 _cleanup_free_ char *fs = NULL, *contents = NULL;
894 const char *sc;
895 int r, unified;
896
897 assert(agent);
898
899 unified = cg_unified();
900 if (unified < 0)
901 return unified;
902 if (unified) /* doesn't apply to unified hierarchy */
903 return -EOPNOTSUPP;
904
905 r = cg_get_path(controller, NULL, "release_agent", &fs);
906 if (r < 0)
907 return r;
908
909 r = read_one_line_file(fs, &contents);
910 if (r < 0)
911 return r;
912
913 sc = strstrip(contents);
914 if (isempty(sc)) {
915 r = write_string_file(fs, agent, 0);
916 if (r < 0)
917 return r;
918 } else if (!path_equal(sc, agent))
919 return -EEXIST;
920
921 fs = mfree(fs);
922 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
923 if (r < 0)
924 return r;
925
926 contents = mfree(contents);
927 r = read_one_line_file(fs, &contents);
928 if (r < 0)
929 return r;
930
931 sc = strstrip(contents);
932 if (streq(sc, "0")) {
933 r = write_string_file(fs, "1", 0);
934 if (r < 0)
935 return r;
936
937 return 1;
938 }
939
940 if (!streq(sc, "1"))
941 return -EIO;
942
943 return 0;
944 }
945
946 int cg_uninstall_release_agent(const char *controller) {
947 _cleanup_free_ char *fs = NULL;
948 int r, unified;
949
950 unified = cg_unified();
951 if (unified < 0)
952 return unified;
953 if (unified) /* Doesn't apply to unified hierarchy */
954 return -EOPNOTSUPP;
955
956 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
957 if (r < 0)
958 return r;
959
960 r = write_string_file(fs, "0", 0);
961 if (r < 0)
962 return r;
963
964 fs = mfree(fs);
965
966 r = cg_get_path(controller, NULL, "release_agent", &fs);
967 if (r < 0)
968 return r;
969
970 r = write_string_file(fs, "", 0);
971 if (r < 0)
972 return r;
973
974 return 0;
975 }
976
977 int cg_is_empty(const char *controller, const char *path) {
978 _cleanup_fclose_ FILE *f = NULL;
979 pid_t pid;
980 int r;
981
982 assert(path);
983
984 r = cg_enumerate_processes(controller, path, &f);
985 if (r == -ENOENT)
986 return 1;
987 if (r < 0)
988 return r;
989
990 r = cg_read_pid(f, &pid);
991 if (r < 0)
992 return r;
993
994 return r == 0;
995 }
996
997 int cg_is_empty_recursive(const char *controller, const char *path) {
998 int unified, r;
999
1000 assert(path);
1001
1002 /* The root cgroup is always populated */
1003 if (controller && (isempty(path) || path_equal(path, "/")))
1004 return false;
1005
1006 unified = cg_unified();
1007 if (unified < 0)
1008 return unified;
1009
1010 if (unified > 0) {
1011 _cleanup_free_ char *populated = NULL, *t = NULL;
1012
1013 /* On the unified hierarchy we can check empty state
1014 * via the "cgroup.populated" attribute. */
1015
1016 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1017 if (r < 0)
1018 return r;
1019
1020 r = read_one_line_file(populated, &t);
1021 if (r == -ENOENT)
1022 return 1;
1023 if (r < 0)
1024 return r;
1025
1026 return streq(t, "0");
1027 } else {
1028 _cleanup_closedir_ DIR *d = NULL;
1029 char *fn;
1030
1031 r = cg_is_empty(controller, path);
1032 if (r <= 0)
1033 return r;
1034
1035 r = cg_enumerate_subgroups(controller, path, &d);
1036 if (r == -ENOENT)
1037 return 1;
1038 if (r < 0)
1039 return r;
1040
1041 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1042 _cleanup_free_ char *p = NULL;
1043
1044 p = strjoin(path, "/", fn, NULL);
1045 free(fn);
1046 if (!p)
1047 return -ENOMEM;
1048
1049 r = cg_is_empty_recursive(controller, p);
1050 if (r <= 0)
1051 return r;
1052 }
1053 if (r < 0)
1054 return r;
1055
1056 return true;
1057 }
1058 }
1059
1060 int cg_split_spec(const char *spec, char **controller, char **path) {
1061 char *t = NULL, *u = NULL;
1062 const char *e;
1063
1064 assert(spec);
1065
1066 if (*spec == '/') {
1067 if (!path_is_safe(spec))
1068 return -EINVAL;
1069
1070 if (path) {
1071 t = strdup(spec);
1072 if (!t)
1073 return -ENOMEM;
1074
1075 *path = path_kill_slashes(t);
1076 }
1077
1078 if (controller)
1079 *controller = NULL;
1080
1081 return 0;
1082 }
1083
1084 e = strchr(spec, ':');
1085 if (!e) {
1086 if (!cg_controller_is_valid(spec))
1087 return -EINVAL;
1088
1089 if (controller) {
1090 t = strdup(spec);
1091 if (!t)
1092 return -ENOMEM;
1093
1094 *controller = t;
1095 }
1096
1097 if (path)
1098 *path = NULL;
1099
1100 return 0;
1101 }
1102
1103 t = strndup(spec, e-spec);
1104 if (!t)
1105 return -ENOMEM;
1106 if (!cg_controller_is_valid(t)) {
1107 free(t);
1108 return -EINVAL;
1109 }
1110
1111 if (isempty(e+1))
1112 u = NULL;
1113 else {
1114 u = strdup(e+1);
1115 if (!u) {
1116 free(t);
1117 return -ENOMEM;
1118 }
1119
1120 if (!path_is_safe(u) ||
1121 !path_is_absolute(u)) {
1122 free(t);
1123 free(u);
1124 return -EINVAL;
1125 }
1126
1127 path_kill_slashes(u);
1128 }
1129
1130 if (controller)
1131 *controller = t;
1132 else
1133 free(t);
1134
1135 if (path)
1136 *path = u;
1137 else
1138 free(u);
1139
1140 return 0;
1141 }
1142
1143 int cg_mangle_path(const char *path, char **result) {
1144 _cleanup_free_ char *c = NULL, *p = NULL;
1145 char *t;
1146 int r;
1147
1148 assert(path);
1149 assert(result);
1150
1151 /* First, check if it already is a filesystem path */
1152 if (path_startswith(path, "/sys/fs/cgroup")) {
1153
1154 t = strdup(path);
1155 if (!t)
1156 return -ENOMEM;
1157
1158 *result = path_kill_slashes(t);
1159 return 0;
1160 }
1161
1162 /* Otherwise, treat it as cg spec */
1163 r = cg_split_spec(path, &c, &p);
1164 if (r < 0)
1165 return r;
1166
1167 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1168 }
1169
1170 int cg_get_root_path(char **path) {
1171 char *p, *e;
1172 int r;
1173
1174 assert(path);
1175
1176 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1177 if (r < 0)
1178 return r;
1179
1180 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1181 if (!e)
1182 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1183 if (!e)
1184 e = endswith(p, "/system"); /* even more legacy */
1185 if (e)
1186 *e = 0;
1187
1188 *path = p;
1189 return 0;
1190 }
1191
1192 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1193 _cleanup_free_ char *rt = NULL;
1194 char *p;
1195 int r;
1196
1197 assert(cgroup);
1198 assert(shifted);
1199
1200 if (!root) {
1201 /* If the root was specified let's use that, otherwise
1202 * let's determine it from PID 1 */
1203
1204 r = cg_get_root_path(&rt);
1205 if (r < 0)
1206 return r;
1207
1208 root = rt;
1209 }
1210
1211 p = path_startswith(cgroup, root);
1212 if (p && p > cgroup)
1213 *shifted = p - 1;
1214 else
1215 *shifted = cgroup;
1216
1217 return 0;
1218 }
1219
1220 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1221 _cleanup_free_ char *raw = NULL;
1222 const char *c;
1223 int r;
1224
1225 assert(pid >= 0);
1226 assert(cgroup);
1227
1228 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1229 if (r < 0)
1230 return r;
1231
1232 r = cg_shift_path(raw, root, &c);
1233 if (r < 0)
1234 return r;
1235
1236 if (c == raw) {
1237 *cgroup = raw;
1238 raw = NULL;
1239 } else {
1240 char *n;
1241
1242 n = strdup(c);
1243 if (!n)
1244 return -ENOMEM;
1245
1246 *cgroup = n;
1247 }
1248
1249 return 0;
1250 }
1251
1252 int cg_path_decode_unit(const char *cgroup, char **unit){
1253 char *c, *s;
1254 size_t n;
1255
1256 assert(cgroup);
1257 assert(unit);
1258
1259 n = strcspn(cgroup, "/");
1260 if (n < 3)
1261 return -ENXIO;
1262
1263 c = strndupa(cgroup, n);
1264 c = cg_unescape(c);
1265
1266 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1267 return -ENXIO;
1268
1269 s = strdup(c);
1270 if (!s)
1271 return -ENOMEM;
1272
1273 *unit = s;
1274 return 0;
1275 }
1276
1277 static bool valid_slice_name(const char *p, size_t n) {
1278
1279 if (!p)
1280 return false;
1281
1282 if (n < strlen("x.slice"))
1283 return false;
1284
1285 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1286 char buf[n+1], *c;
1287
1288 memcpy(buf, p, n);
1289 buf[n] = 0;
1290
1291 c = cg_unescape(buf);
1292
1293 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1294 }
1295
1296 return false;
1297 }
1298
1299 static const char *skip_slices(const char *p) {
1300 assert(p);
1301
1302 /* Skips over all slice assignments */
1303
1304 for (;;) {
1305 size_t n;
1306
1307 p += strspn(p, "/");
1308
1309 n = strcspn(p, "/");
1310 if (!valid_slice_name(p, n))
1311 return p;
1312
1313 p += n;
1314 }
1315 }
1316
1317 int cg_path_get_unit(const char *path, char **ret) {
1318 const char *e;
1319 char *unit;
1320 int r;
1321
1322 assert(path);
1323 assert(ret);
1324
1325 e = skip_slices(path);
1326
1327 r = cg_path_decode_unit(e, &unit);
1328 if (r < 0)
1329 return r;
1330
1331 /* We skipped over the slices, don't accept any now */
1332 if (endswith(unit, ".slice")) {
1333 free(unit);
1334 return -ENXIO;
1335 }
1336
1337 *ret = unit;
1338 return 0;
1339 }
1340
1341 int cg_pid_get_unit(pid_t pid, char **unit) {
1342 _cleanup_free_ char *cgroup = NULL;
1343 int r;
1344
1345 assert(unit);
1346
1347 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1348 if (r < 0)
1349 return r;
1350
1351 return cg_path_get_unit(cgroup, unit);
1352 }
1353
1354 /**
1355 * Skip session-*.scope, but require it to be there.
1356 */
1357 static const char *skip_session(const char *p) {
1358 size_t n;
1359
1360 if (isempty(p))
1361 return NULL;
1362
1363 p += strspn(p, "/");
1364
1365 n = strcspn(p, "/");
1366 if (n < strlen("session-x.scope"))
1367 return NULL;
1368
1369 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1370 char buf[n - 8 - 6 + 1];
1371
1372 memcpy(buf, p + 8, n - 8 - 6);
1373 buf[n - 8 - 6] = 0;
1374
1375 /* Note that session scopes never need unescaping,
1376 * since they cannot conflict with the kernel's own
1377 * names, hence we don't need to call cg_unescape()
1378 * here. */
1379
1380 if (!session_id_valid(buf))
1381 return false;
1382
1383 p += n;
1384 p += strspn(p, "/");
1385 return p;
1386 }
1387
1388 return NULL;
1389 }
1390
1391 /**
1392 * Skip user@*.service, but require it to be there.
1393 */
1394 static const char *skip_user_manager(const char *p) {
1395 size_t n;
1396
1397 if (isempty(p))
1398 return NULL;
1399
1400 p += strspn(p, "/");
1401
1402 n = strcspn(p, "/");
1403 if (n < strlen("user@x.service"))
1404 return NULL;
1405
1406 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1407 char buf[n - 5 - 8 + 1];
1408
1409 memcpy(buf, p + 5, n - 5 - 8);
1410 buf[n - 5 - 8] = 0;
1411
1412 /* Note that user manager services never need unescaping,
1413 * since they cannot conflict with the kernel's own
1414 * names, hence we don't need to call cg_unescape()
1415 * here. */
1416
1417 if (parse_uid(buf, NULL) < 0)
1418 return NULL;
1419
1420 p += n;
1421 p += strspn(p, "/");
1422
1423 return p;
1424 }
1425
1426 return NULL;
1427 }
1428
1429 static const char *skip_user_prefix(const char *path) {
1430 const char *e, *t;
1431
1432 assert(path);
1433
1434 /* Skip slices, if there are any */
1435 e = skip_slices(path);
1436
1437 /* Skip the user manager, if it's in the path now... */
1438 t = skip_user_manager(e);
1439 if (t)
1440 return t;
1441
1442 /* Alternatively skip the user session if it is in the path... */
1443 return skip_session(e);
1444 }
1445
1446 int cg_path_get_user_unit(const char *path, char **ret) {
1447 const char *t;
1448
1449 assert(path);
1450 assert(ret);
1451
1452 t = skip_user_prefix(path);
1453 if (!t)
1454 return -ENXIO;
1455
1456 /* And from here on it looks pretty much the same as for a
1457 * system unit, hence let's use the same parser from here
1458 * on. */
1459 return cg_path_get_unit(t, ret);
1460 }
1461
1462 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1463 _cleanup_free_ char *cgroup = NULL;
1464 int r;
1465
1466 assert(unit);
1467
1468 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1469 if (r < 0)
1470 return r;
1471
1472 return cg_path_get_user_unit(cgroup, unit);
1473 }
1474
1475 int cg_path_get_machine_name(const char *path, char **machine) {
1476 _cleanup_free_ char *u = NULL;
1477 const char *sl;
1478 int r;
1479
1480 r = cg_path_get_unit(path, &u);
1481 if (r < 0)
1482 return r;
1483
1484 sl = strjoina("/run/systemd/machines/unit:", u);
1485 return readlink_malloc(sl, machine);
1486 }
1487
1488 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1489 _cleanup_free_ char *cgroup = NULL;
1490 int r;
1491
1492 assert(machine);
1493
1494 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1495 if (r < 0)
1496 return r;
1497
1498 return cg_path_get_machine_name(cgroup, machine);
1499 }
1500
1501 int cg_path_get_session(const char *path, char **session) {
1502 _cleanup_free_ char *unit = NULL;
1503 char *start, *end;
1504 int r;
1505
1506 assert(path);
1507
1508 r = cg_path_get_unit(path, &unit);
1509 if (r < 0)
1510 return r;
1511
1512 start = startswith(unit, "session-");
1513 if (!start)
1514 return -ENXIO;
1515 end = endswith(start, ".scope");
1516 if (!end)
1517 return -ENXIO;
1518
1519 *end = 0;
1520 if (!session_id_valid(start))
1521 return -ENXIO;
1522
1523 if (session) {
1524 char *rr;
1525
1526 rr = strdup(start);
1527 if (!rr)
1528 return -ENOMEM;
1529
1530 *session = rr;
1531 }
1532
1533 return 0;
1534 }
1535
1536 int cg_pid_get_session(pid_t pid, char **session) {
1537 _cleanup_free_ char *cgroup = NULL;
1538 int r;
1539
1540 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1541 if (r < 0)
1542 return r;
1543
1544 return cg_path_get_session(cgroup, session);
1545 }
1546
1547 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1548 _cleanup_free_ char *slice = NULL;
1549 char *start, *end;
1550 int r;
1551
1552 assert(path);
1553
1554 r = cg_path_get_slice(path, &slice);
1555 if (r < 0)
1556 return r;
1557
1558 start = startswith(slice, "user-");
1559 if (!start)
1560 return -ENXIO;
1561 end = endswith(start, ".slice");
1562 if (!end)
1563 return -ENXIO;
1564
1565 *end = 0;
1566 if (parse_uid(start, uid) < 0)
1567 return -ENXIO;
1568
1569 return 0;
1570 }
1571
1572 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1573 _cleanup_free_ char *cgroup = NULL;
1574 int r;
1575
1576 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1577 if (r < 0)
1578 return r;
1579
1580 return cg_path_get_owner_uid(cgroup, uid);
1581 }
1582
1583 int cg_path_get_slice(const char *p, char **slice) {
1584 const char *e = NULL;
1585
1586 assert(p);
1587 assert(slice);
1588
1589 /* Finds the right-most slice unit from the beginning, but
1590 * stops before we come to the first non-slice unit. */
1591
1592 for (;;) {
1593 size_t n;
1594
1595 p += strspn(p, "/");
1596
1597 n = strcspn(p, "/");
1598 if (!valid_slice_name(p, n)) {
1599
1600 if (!e) {
1601 char *s;
1602
1603 s = strdup("-.slice");
1604 if (!s)
1605 return -ENOMEM;
1606
1607 *slice = s;
1608 return 0;
1609 }
1610
1611 return cg_path_decode_unit(e, slice);
1612 }
1613
1614 e = p;
1615 p += n;
1616 }
1617 }
1618
1619 int cg_pid_get_slice(pid_t pid, char **slice) {
1620 _cleanup_free_ char *cgroup = NULL;
1621 int r;
1622
1623 assert(slice);
1624
1625 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1626 if (r < 0)
1627 return r;
1628
1629 return cg_path_get_slice(cgroup, slice);
1630 }
1631
1632 int cg_path_get_user_slice(const char *p, char **slice) {
1633 const char *t;
1634 assert(p);
1635 assert(slice);
1636
1637 t = skip_user_prefix(p);
1638 if (!t)
1639 return -ENXIO;
1640
1641 /* And now it looks pretty much the same as for a system
1642 * slice, so let's just use the same parser from here on. */
1643 return cg_path_get_slice(t, slice);
1644 }
1645
1646 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1647 _cleanup_free_ char *cgroup = NULL;
1648 int r;
1649
1650 assert(slice);
1651
1652 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1653 if (r < 0)
1654 return r;
1655
1656 return cg_path_get_user_slice(cgroup, slice);
1657 }
1658
1659 char *cg_escape(const char *p) {
1660 bool need_prefix = false;
1661
1662 /* This implements very minimal escaping for names to be used
1663 * as file names in the cgroup tree: any name which might
1664 * conflict with a kernel name or is prefixed with '_' is
1665 * prefixed with a '_'. That way, when reading cgroup names it
1666 * is sufficient to remove a single prefixing underscore if
1667 * there is one. */
1668
1669 /* The return value of this function (unlike cg_unescape())
1670 * needs free()! */
1671
1672 if (p[0] == 0 ||
1673 p[0] == '_' ||
1674 p[0] == '.' ||
1675 streq(p, "notify_on_release") ||
1676 streq(p, "release_agent") ||
1677 streq(p, "tasks") ||
1678 startswith(p, "cgroup."))
1679 need_prefix = true;
1680 else {
1681 const char *dot;
1682
1683 dot = strrchr(p, '.');
1684 if (dot) {
1685 CGroupController c;
1686 size_t l = dot - p;
1687
1688 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1689 const char *n;
1690
1691 n = cgroup_controller_to_string(c);
1692
1693 if (l != strlen(n))
1694 continue;
1695
1696 if (memcmp(p, n, l) != 0)
1697 continue;
1698
1699 need_prefix = true;
1700 break;
1701 }
1702 }
1703 }
1704
1705 if (need_prefix)
1706 return strappend("_", p);
1707
1708 return strdup(p);
1709 }
1710
1711 char *cg_unescape(const char *p) {
1712 assert(p);
1713
1714 /* The return value of this function (unlike cg_escape())
1715 * doesn't need free()! */
1716
1717 if (p[0] == '_')
1718 return (char*) p+1;
1719
1720 return (char*) p;
1721 }
1722
1723 #define CONTROLLER_VALID \
1724 DIGITS LETTERS \
1725 "_"
1726
1727 bool cg_controller_is_valid(const char *p) {
1728 const char *t, *s;
1729
1730 if (!p)
1731 return false;
1732
1733 s = startswith(p, "name=");
1734 if (s)
1735 p = s;
1736
1737 if (*p == 0 || *p == '_')
1738 return false;
1739
1740 for (t = p; *t; t++)
1741 if (!strchr(CONTROLLER_VALID, *t))
1742 return false;
1743
1744 if (t - p > FILENAME_MAX)
1745 return false;
1746
1747 return true;
1748 }
1749
1750 int cg_slice_to_path(const char *unit, char **ret) {
1751 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1752 const char *dash;
1753 int r;
1754
1755 assert(unit);
1756 assert(ret);
1757
1758 if (streq(unit, "-.slice")) {
1759 char *x;
1760
1761 x = strdup("");
1762 if (!x)
1763 return -ENOMEM;
1764 *ret = x;
1765 return 0;
1766 }
1767
1768 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1769 return -EINVAL;
1770
1771 if (!endswith(unit, ".slice"))
1772 return -EINVAL;
1773
1774 r = unit_name_to_prefix(unit, &p);
1775 if (r < 0)
1776 return r;
1777
1778 dash = strchr(p, '-');
1779
1780 /* Don't allow initial dashes */
1781 if (dash == p)
1782 return -EINVAL;
1783
1784 while (dash) {
1785 _cleanup_free_ char *escaped = NULL;
1786 char n[dash - p + sizeof(".slice")];
1787
1788 /* Don't allow trailing or double dashes */
1789 if (dash[1] == 0 || dash[1] == '-')
1790 return -EINVAL;
1791
1792 strcpy(stpncpy(n, p, dash - p), ".slice");
1793 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1794 return -EINVAL;
1795
1796 escaped = cg_escape(n);
1797 if (!escaped)
1798 return -ENOMEM;
1799
1800 if (!strextend(&s, escaped, "/", NULL))
1801 return -ENOMEM;
1802
1803 dash = strchr(dash+1, '-');
1804 }
1805
1806 e = cg_escape(unit);
1807 if (!e)
1808 return -ENOMEM;
1809
1810 if (!strextend(&s, e, NULL))
1811 return -ENOMEM;
1812
1813 *ret = s;
1814 s = NULL;
1815
1816 return 0;
1817 }
1818
1819 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1820 _cleanup_free_ char *p = NULL;
1821 int r;
1822
1823 r = cg_get_path(controller, path, attribute, &p);
1824 if (r < 0)
1825 return r;
1826
1827 return write_string_file(p, value, 0);
1828 }
1829
1830 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1831 _cleanup_free_ char *p = NULL;
1832 int r;
1833
1834 r = cg_get_path(controller, path, attribute, &p);
1835 if (r < 0)
1836 return r;
1837
1838 return read_one_line_file(p, ret);
1839 }
1840
1841 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1842 CGroupController c;
1843 int r, unified;
1844
1845 /* This one will create a cgroup in our private tree, but also
1846 * duplicate it in the trees specified in mask, and remove it
1847 * in all others */
1848
1849 /* First create the cgroup in our own hierarchy. */
1850 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1851 if (r < 0)
1852 return r;
1853
1854 /* If we are in the unified hierarchy, we are done now */
1855 unified = cg_unified();
1856 if (unified < 0)
1857 return unified;
1858 if (unified > 0)
1859 return 0;
1860
1861 /* Otherwise, do the same in the other hierarchies */
1862 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1863 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1864 const char *n;
1865
1866 n = cgroup_controller_to_string(c);
1867
1868 if (mask & bit)
1869 (void) cg_create(n, path);
1870 else if (supported & bit)
1871 (void) cg_trim(n, path, true);
1872 }
1873
1874 return 0;
1875 }
1876
1877 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1878 CGroupController c;
1879 int r, unified;
1880
1881 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1882 if (r < 0)
1883 return r;
1884
1885 unified = cg_unified();
1886 if (unified < 0)
1887 return unified;
1888 if (unified > 0)
1889 return 0;
1890
1891 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1892 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1893 const char *p = NULL;
1894
1895 if (!(supported & bit))
1896 continue;
1897
1898 if (path_callback)
1899 p = path_callback(bit, userdata);
1900
1901 if (!p)
1902 p = path;
1903
1904 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1905 }
1906
1907 return 0;
1908 }
1909
1910 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1911 Iterator i;
1912 void *pidp;
1913 int r = 0;
1914
1915 SET_FOREACH(pidp, pids, i) {
1916 pid_t pid = PTR_TO_PID(pidp);
1917 int q;
1918
1919 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1920 if (q < 0 && r >= 0)
1921 r = q;
1922 }
1923
1924 return r;
1925 }
1926
1927 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1928 CGroupController c;
1929 int r = 0, unified;
1930
1931 if (!path_equal(from, to)) {
1932 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1933 if (r < 0)
1934 return r;
1935 }
1936
1937 unified = cg_unified();
1938 if (unified < 0)
1939 return unified;
1940 if (unified > 0)
1941 return r;
1942
1943 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1944 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1945 const char *p = NULL;
1946
1947 if (!(supported & bit))
1948 continue;
1949
1950 if (to_callback)
1951 p = to_callback(bit, userdata);
1952
1953 if (!p)
1954 p = to;
1955
1956 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1957 }
1958
1959 return 0;
1960 }
1961
1962 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1963 CGroupController c;
1964 int r, unified;
1965
1966 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1967 if (r < 0)
1968 return r;
1969
1970 unified = cg_unified();
1971 if (unified < 0)
1972 return unified;
1973 if (unified > 0)
1974 return r;
1975
1976 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1977 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1978
1979 if (!(supported & bit))
1980 continue;
1981
1982 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1983 }
1984
1985 return 0;
1986 }
1987
1988 int cg_mask_supported(CGroupMask *ret) {
1989 CGroupMask mask = 0;
1990 int r, unified;
1991
1992 /* Determines the mask of supported cgroup controllers. Only
1993 * includes controllers we can make sense of and that are
1994 * actually accessible. */
1995
1996 unified = cg_unified();
1997 if (unified < 0)
1998 return unified;
1999 if (unified > 0) {
2000 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2001 const char *c;
2002
2003 /* In the unified hierarchy we can read the supported
2004 * and accessible controllers from a the top-level
2005 * cgroup attribute */
2006
2007 r = cg_get_root_path(&root);
2008 if (r < 0)
2009 return r;
2010
2011 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2012 if (r < 0)
2013 return r;
2014
2015 r = read_one_line_file(path, &controllers);
2016 if (r < 0)
2017 return r;
2018
2019 c = controllers;
2020 for (;;) {
2021 _cleanup_free_ char *n = NULL;
2022 CGroupController v;
2023
2024 r = extract_first_word(&c, &n, NULL, 0);
2025 if (r < 0)
2026 return r;
2027 if (r == 0)
2028 break;
2029
2030 v = cgroup_controller_from_string(n);
2031 if (v < 0)
2032 continue;
2033
2034 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2035 }
2036
2037 /* Currently, we only support the memory and pids
2038 * controller in the unified hierarchy, mask
2039 * everything else off. */
2040 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2041
2042 } else {
2043 CGroupController c;
2044
2045 /* In the legacy hierarchy, we check whether which
2046 * hierarchies are mounted. */
2047
2048 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2049 const char *n;
2050
2051 n = cgroup_controller_to_string(c);
2052 if (controller_is_accessible(n) >= 0)
2053 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2054 }
2055 }
2056
2057 *ret = mask;
2058 return 0;
2059 }
2060
2061 int cg_kernel_controllers(Set *controllers) {
2062 _cleanup_fclose_ FILE *f = NULL;
2063 char buf[LINE_MAX];
2064 int r;
2065
2066 assert(controllers);
2067
2068 /* Determines the full list of kernel-known controllers. Might
2069 * include controllers we don't actually support, arbitrary
2070 * named hierarchies and controllers that aren't currently
2071 * accessible (because not mounted). */
2072
2073 f = fopen("/proc/cgroups", "re");
2074 if (!f) {
2075 if (errno == ENOENT)
2076 return 0;
2077 return -errno;
2078 }
2079
2080 /* Ignore the header line */
2081 (void) fgets(buf, sizeof(buf), f);
2082
2083 for (;;) {
2084 char *controller;
2085 int enabled = 0;
2086
2087 errno = 0;
2088 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2089
2090 if (feof(f))
2091 break;
2092
2093 if (ferror(f) && errno != 0)
2094 return -errno;
2095
2096 return -EBADMSG;
2097 }
2098
2099 if (!enabled) {
2100 free(controller);
2101 continue;
2102 }
2103
2104 if (!cg_controller_is_valid(controller)) {
2105 free(controller);
2106 return -EBADMSG;
2107 }
2108
2109 r = set_consume(controllers, controller);
2110 if (r < 0)
2111 return r;
2112 }
2113
2114 return 0;
2115 }
2116
2117 static thread_local int unified_cache = -1;
2118
2119 int cg_unified(void) {
2120 struct statfs fs;
2121
2122 /* Checks if we support the unified hierarchy. Returns an
2123 * error when the cgroup hierarchies aren't mounted yet or we
2124 * have any other trouble determining if the unified hierarchy
2125 * is supported. */
2126
2127 if (unified_cache >= 0)
2128 return unified_cache;
2129
2130 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2131 return -errno;
2132
2133 if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2134 unified_cache = true;
2135 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2136 unified_cache = false;
2137 else
2138 return -ENOMEDIUM;
2139
2140 return unified_cache;
2141 }
2142
2143 void cg_unified_flush(void) {
2144 unified_cache = -1;
2145 }
2146
2147 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2148 _cleanup_free_ char *fs = NULL;
2149 CGroupController c;
2150 int r, unified;
2151
2152 assert(p);
2153
2154 if (supported == 0)
2155 return 0;
2156
2157 unified = cg_unified();
2158 if (unified < 0)
2159 return unified;
2160 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2161 return 0;
2162
2163 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2164 if (r < 0)
2165 return r;
2166
2167 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2168 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2169 const char *n;
2170
2171 if (!(supported & bit))
2172 continue;
2173
2174 n = cgroup_controller_to_string(c);
2175 {
2176 char s[1 + strlen(n) + 1];
2177
2178 s[0] = mask & bit ? '+' : '-';
2179 strcpy(s + 1, n);
2180
2181 r = write_string_file(fs, s, 0);
2182 if (r < 0)
2183 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2184 }
2185 }
2186
2187 return 0;
2188 }
2189
2190 bool cg_is_unified_wanted(void) {
2191 static thread_local int wanted = -1;
2192 int r, unified;
2193
2194 /* If the hierarchy is already mounted, then follow whatever
2195 * was chosen for it. */
2196 unified = cg_unified();
2197 if (unified >= 0)
2198 return unified;
2199
2200 /* Otherwise, let's see what the kernel command line has to
2201 * say. Since checking that is expensive, let's cache the
2202 * result. */
2203 if (wanted >= 0)
2204 return wanted;
2205
2206 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2207 if (r > 0)
2208 return (wanted = true);
2209 else {
2210 _cleanup_free_ char *value = NULL;
2211
2212 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2213 if (r < 0)
2214 return false;
2215 if (r == 0)
2216 return (wanted = false);
2217
2218 return (wanted = parse_boolean(value) > 0);
2219 }
2220 }
2221
2222 bool cg_is_legacy_wanted(void) {
2223 return !cg_is_unified_wanted();
2224 }
2225
2226 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2227 uint64_t u;
2228 int r;
2229
2230 if (isempty(s)) {
2231 *ret = CGROUP_CPU_SHARES_INVALID;
2232 return 0;
2233 }
2234
2235 r = safe_atou64(s, &u);
2236 if (r < 0)
2237 return r;
2238
2239 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2240 return -ERANGE;
2241
2242 *ret = u;
2243 return 0;
2244 }
2245
2246 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2247 uint64_t u;
2248 int r;
2249
2250 if (isempty(s)) {
2251 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2252 return 0;
2253 }
2254
2255 r = safe_atou64(s, &u);
2256 if (r < 0)
2257 return r;
2258
2259 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2260 return -ERANGE;
2261
2262 *ret = u;
2263 return 0;
2264 }
2265
2266 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2267 [CGROUP_CONTROLLER_CPU] = "cpu",
2268 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2269 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2270 [CGROUP_CONTROLLER_MEMORY] = "memory",
2271 [CGROUP_CONTROLLER_DEVICES] = "devices",
2272 [CGROUP_CONTROLLER_PIDS] = "pids",
2273 [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2274 };
2275
2276 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);