]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
Merge pull request #1691 from poettering/util-lib-3
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dirent.h>
23 #include <errno.h>
24 #include <ftw.h>
25 #include <signal.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <unistd.h>
31
32 #include "alloc-util.h"
33 #include "cgroup-util.h"
34 #include "dirent-util.h"
35 #include "extract-word.h"
36 #include "fd-util.h"
37 #include "fileio.h"
38 #include "formats-util.h"
39 #include "fs-util.h"
40 #include "login-util.h"
41 #include "macro.h"
42 #include "mkdir.h"
43 #include "parse-util.h"
44 #include "path-util.h"
45 #include "proc-cmdline.h"
46 #include "process-util.h"
47 #include "set.h"
48 #include "special.h"
49 #include "stat-util.h"
50 #include "string-table.h"
51 #include "string-util.h"
52 #include "unit-name.h"
53 #include "user-util.h"
54 #include "util.h"
55
56 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
57 _cleanup_free_ char *fs = NULL;
58 FILE *f;
59 int r;
60
61 assert(_f);
62
63 r = cg_get_path(controller, path, "cgroup.procs", &fs);
64 if (r < 0)
65 return r;
66
67 f = fopen(fs, "re");
68 if (!f)
69 return -errno;
70
71 *_f = f;
72 return 0;
73 }
74
75 int cg_read_pid(FILE *f, pid_t *_pid) {
76 unsigned long ul;
77
78 /* Note that the cgroup.procs might contain duplicates! See
79 * cgroups.txt for details. */
80
81 assert(f);
82 assert(_pid);
83
84 errno = 0;
85 if (fscanf(f, "%lu", &ul) != 1) {
86
87 if (feof(f))
88 return 0;
89
90 return errno ? -errno : -EIO;
91 }
92
93 if (ul <= 0)
94 return -EIO;
95
96 *_pid = (pid_t) ul;
97 return 1;
98 }
99
100 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
101 _cleanup_free_ char *fs = NULL;
102 int r;
103 DIR *d;
104
105 assert(_d);
106
107 /* This is not recursive! */
108
109 r = cg_get_path(controller, path, NULL, &fs);
110 if (r < 0)
111 return r;
112
113 d = opendir(fs);
114 if (!d)
115 return -errno;
116
117 *_d = d;
118 return 0;
119 }
120
121 int cg_read_subgroup(DIR *d, char **fn) {
122 struct dirent *de;
123
124 assert(d);
125 assert(fn);
126
127 FOREACH_DIRENT_ALL(de, d, return -errno) {
128 char *b;
129
130 if (de->d_type != DT_DIR)
131 continue;
132
133 if (streq(de->d_name, ".") ||
134 streq(de->d_name, ".."))
135 continue;
136
137 b = strdup(de->d_name);
138 if (!b)
139 return -ENOMEM;
140
141 *fn = b;
142 return 1;
143 }
144
145 return 0;
146 }
147
148 int cg_rmdir(const char *controller, const char *path) {
149 _cleanup_free_ char *p = NULL;
150 int r;
151
152 r = cg_get_path(controller, path, NULL, &p);
153 if (r < 0)
154 return r;
155
156 r = rmdir(p);
157 if (r < 0 && errno != ENOENT)
158 return -errno;
159
160 return 0;
161 }
162
163 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
164 _cleanup_set_free_ Set *allocated_set = NULL;
165 bool done = false;
166 int r, ret = 0;
167 pid_t my_pid;
168
169 assert(sig >= 0);
170
171 /* This goes through the tasks list and kills them all. This
172 * is repeated until no further processes are added to the
173 * tasks list, to properly handle forking processes */
174
175 if (!s) {
176 s = allocated_set = set_new(NULL);
177 if (!s)
178 return -ENOMEM;
179 }
180
181 my_pid = getpid();
182
183 do {
184 _cleanup_fclose_ FILE *f = NULL;
185 pid_t pid = 0;
186 done = true;
187
188 r = cg_enumerate_processes(controller, path, &f);
189 if (r < 0) {
190 if (ret >= 0 && r != -ENOENT)
191 return r;
192
193 return ret;
194 }
195
196 while ((r = cg_read_pid(f, &pid)) > 0) {
197
198 if (ignore_self && pid == my_pid)
199 continue;
200
201 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
202 continue;
203
204 /* If we haven't killed this process yet, kill
205 * it */
206 if (kill(pid, sig) < 0) {
207 if (ret >= 0 && errno != ESRCH)
208 ret = -errno;
209 } else {
210 if (sigcont && sig != SIGKILL)
211 (void) kill(pid, SIGCONT);
212
213 if (ret == 0)
214 ret = 1;
215 }
216
217 done = false;
218
219 r = set_put(s, PID_TO_PTR(pid));
220 if (r < 0) {
221 if (ret >= 0)
222 return r;
223
224 return ret;
225 }
226 }
227
228 if (r < 0) {
229 if (ret >= 0)
230 return r;
231
232 return ret;
233 }
234
235 /* To avoid racing against processes which fork
236 * quicker than we can kill them we repeat this until
237 * no new pids need to be killed. */
238
239 } while (!done);
240
241 return ret;
242 }
243
244 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
245 _cleanup_set_free_ Set *allocated_set = NULL;
246 _cleanup_closedir_ DIR *d = NULL;
247 int r, ret;
248 char *fn;
249
250 assert(path);
251 assert(sig >= 0);
252
253 if (!s) {
254 s = allocated_set = set_new(NULL);
255 if (!s)
256 return -ENOMEM;
257 }
258
259 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
260
261 r = cg_enumerate_subgroups(controller, path, &d);
262 if (r < 0) {
263 if (ret >= 0 && r != -ENOENT)
264 return r;
265
266 return ret;
267 }
268
269 while ((r = cg_read_subgroup(d, &fn)) > 0) {
270 _cleanup_free_ char *p = NULL;
271
272 p = strjoin(path, "/", fn, NULL);
273 free(fn);
274 if (!p)
275 return -ENOMEM;
276
277 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
278 if (r != 0 && ret >= 0)
279 ret = r;
280 }
281
282 if (ret >= 0 && r < 0)
283 ret = r;
284
285 if (rem) {
286 r = cg_rmdir(controller, path);
287 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
288 return r;
289 }
290
291 return ret;
292 }
293
294 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
295 bool done = false;
296 _cleanup_set_free_ Set *s = NULL;
297 int r, ret = 0;
298 pid_t my_pid;
299
300 assert(cfrom);
301 assert(pfrom);
302 assert(cto);
303 assert(pto);
304
305 s = set_new(NULL);
306 if (!s)
307 return -ENOMEM;
308
309 my_pid = getpid();
310
311 do {
312 _cleanup_fclose_ FILE *f = NULL;
313 pid_t pid = 0;
314 done = true;
315
316 r = cg_enumerate_processes(cfrom, pfrom, &f);
317 if (r < 0) {
318 if (ret >= 0 && r != -ENOENT)
319 return r;
320
321 return ret;
322 }
323
324 while ((r = cg_read_pid(f, &pid)) > 0) {
325
326 /* This might do weird stuff if we aren't a
327 * single-threaded program. However, we
328 * luckily know we are not */
329 if (ignore_self && pid == my_pid)
330 continue;
331
332 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
333 continue;
334
335 /* Ignore kernel threads. Since they can only
336 * exist in the root cgroup, we only check for
337 * them there. */
338 if (cfrom &&
339 (isempty(pfrom) || path_equal(pfrom, "/")) &&
340 is_kernel_thread(pid) > 0)
341 continue;
342
343 r = cg_attach(cto, pto, pid);
344 if (r < 0) {
345 if (ret >= 0 && r != -ESRCH)
346 ret = r;
347 } else if (ret == 0)
348 ret = 1;
349
350 done = false;
351
352 r = set_put(s, PID_TO_PTR(pid));
353 if (r < 0) {
354 if (ret >= 0)
355 return r;
356
357 return ret;
358 }
359 }
360
361 if (r < 0) {
362 if (ret >= 0)
363 return r;
364
365 return ret;
366 }
367 } while (!done);
368
369 return ret;
370 }
371
372 int cg_migrate_recursive(
373 const char *cfrom,
374 const char *pfrom,
375 const char *cto,
376 const char *pto,
377 bool ignore_self,
378 bool rem) {
379
380 _cleanup_closedir_ DIR *d = NULL;
381 int r, ret = 0;
382 char *fn;
383
384 assert(cfrom);
385 assert(pfrom);
386 assert(cto);
387 assert(pto);
388
389 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
390
391 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
392 if (r < 0) {
393 if (ret >= 0 && r != -ENOENT)
394 return r;
395
396 return ret;
397 }
398
399 while ((r = cg_read_subgroup(d, &fn)) > 0) {
400 _cleanup_free_ char *p = NULL;
401
402 p = strjoin(pfrom, "/", fn, NULL);
403 free(fn);
404 if (!p)
405 return -ENOMEM;
406
407 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
408 if (r != 0 && ret >= 0)
409 ret = r;
410 }
411
412 if (r < 0 && ret >= 0)
413 ret = r;
414
415 if (rem) {
416 r = cg_rmdir(cfrom, pfrom);
417 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
418 return r;
419 }
420
421 return ret;
422 }
423
424 int cg_migrate_recursive_fallback(
425 const char *cfrom,
426 const char *pfrom,
427 const char *cto,
428 const char *pto,
429 bool ignore_self,
430 bool rem) {
431
432 int r;
433
434 assert(cfrom);
435 assert(pfrom);
436 assert(cto);
437 assert(pto);
438
439 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
440 if (r < 0) {
441 char prefix[strlen(pto) + 1];
442
443 /* This didn't work? Then let's try all prefixes of the destination */
444
445 PATH_FOREACH_PREFIX(prefix, pto) {
446 int q;
447
448 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
449 if (q >= 0)
450 return q;
451 }
452 }
453
454 return r;
455 }
456
457 static const char *controller_to_dirname(const char *controller) {
458 const char *e;
459
460 assert(controller);
461
462 /* Converts a controller name to the directory name below
463 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
464 * just cuts off the name= prefixed used for named
465 * hierarchies, if it is specified. */
466
467 e = startswith(controller, "name=");
468 if (e)
469 return e;
470
471 return controller;
472 }
473
474 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
475 const char *dn;
476 char *t = NULL;
477
478 assert(fs);
479 assert(controller);
480
481 dn = controller_to_dirname(controller);
482
483 if (isempty(path) && isempty(suffix))
484 t = strappend("/sys/fs/cgroup/", dn);
485 else if (isempty(path))
486 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
487 else if (isempty(suffix))
488 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
489 else
490 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
491 if (!t)
492 return -ENOMEM;
493
494 *fs = t;
495 return 0;
496 }
497
498 static int join_path_unified(const char *path, const char *suffix, char **fs) {
499 char *t;
500
501 assert(fs);
502
503 if (isempty(path) && isempty(suffix))
504 t = strdup("/sys/fs/cgroup");
505 else if (isempty(path))
506 t = strappend("/sys/fs/cgroup/", suffix);
507 else if (isempty(suffix))
508 t = strappend("/sys/fs/cgroup/", path);
509 else
510 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
511 if (!t)
512 return -ENOMEM;
513
514 *fs = t;
515 return 0;
516 }
517
518 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
519 int unified, r;
520
521 assert(fs);
522
523 if (!controller) {
524 char *t;
525
526 /* If no controller is specified, we return the path
527 * *below* the controllers, without any prefix. */
528
529 if (!path && !suffix)
530 return -EINVAL;
531
532 if (!suffix)
533 t = strdup(path);
534 else if (!path)
535 t = strdup(suffix);
536 else
537 t = strjoin(path, "/", suffix, NULL);
538 if (!t)
539 return -ENOMEM;
540
541 *fs = path_kill_slashes(t);
542 return 0;
543 }
544
545 if (!cg_controller_is_valid(controller))
546 return -EINVAL;
547
548 unified = cg_unified();
549 if (unified < 0)
550 return unified;
551
552 if (unified > 0)
553 r = join_path_unified(path, suffix, fs);
554 else
555 r = join_path_legacy(controller, path, suffix, fs);
556 if (r < 0)
557 return r;
558
559 path_kill_slashes(*fs);
560 return 0;
561 }
562
563 static int controller_is_accessible(const char *controller) {
564 int unified;
565
566 assert(controller);
567
568 /* Checks whether a specific controller is accessible,
569 * i.e. its hierarchy mounted. In the unified hierarchy all
570 * controllers are considered accessible, except for the named
571 * hierarchies */
572
573 if (!cg_controller_is_valid(controller))
574 return -EINVAL;
575
576 unified = cg_unified();
577 if (unified < 0)
578 return unified;
579 if (unified > 0) {
580 /* We don't support named hierarchies if we are using
581 * the unified hierarchy. */
582
583 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
584 return 0;
585
586 if (startswith(controller, "name="))
587 return -EOPNOTSUPP;
588
589 } else {
590 const char *cc, *dn;
591
592 dn = controller_to_dirname(controller);
593 cc = strjoina("/sys/fs/cgroup/", dn);
594
595 if (laccess(cc, F_OK) < 0)
596 return -errno;
597 }
598
599 return 0;
600 }
601
602 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
603 int r;
604
605 assert(controller);
606 assert(fs);
607
608 /* Check if the specified controller is actually accessible */
609 r = controller_is_accessible(controller);
610 if (r < 0)
611 return r;
612
613 return cg_get_path(controller, path, suffix, fs);
614 }
615
616 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
617 assert(path);
618 assert(sb);
619 assert(ftwbuf);
620
621 if (typeflag != FTW_DP)
622 return 0;
623
624 if (ftwbuf->level < 1)
625 return 0;
626
627 (void) rmdir(path);
628 return 0;
629 }
630
631 int cg_trim(const char *controller, const char *path, bool delete_root) {
632 _cleanup_free_ char *fs = NULL;
633 int r = 0;
634
635 assert(path);
636
637 r = cg_get_path(controller, path, NULL, &fs);
638 if (r < 0)
639 return r;
640
641 errno = 0;
642 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
643 if (errno == ENOENT)
644 r = 0;
645 else if (errno != 0)
646 r = -errno;
647 else
648 r = -EIO;
649 }
650
651 if (delete_root) {
652 if (rmdir(fs) < 0 && errno != ENOENT)
653 return -errno;
654 }
655
656 return r;
657 }
658
659 int cg_create(const char *controller, const char *path) {
660 _cleanup_free_ char *fs = NULL;
661 int r;
662
663 r = cg_get_path_and_check(controller, path, NULL, &fs);
664 if (r < 0)
665 return r;
666
667 r = mkdir_parents(fs, 0755);
668 if (r < 0)
669 return r;
670
671 if (mkdir(fs, 0755) < 0) {
672
673 if (errno == EEXIST)
674 return 0;
675
676 return -errno;
677 }
678
679 return 1;
680 }
681
682 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
683 int r, q;
684
685 assert(pid >= 0);
686
687 r = cg_create(controller, path);
688 if (r < 0)
689 return r;
690
691 q = cg_attach(controller, path, pid);
692 if (q < 0)
693 return q;
694
695 /* This does not remove the cgroup on failure */
696 return r;
697 }
698
699 int cg_attach(const char *controller, const char *path, pid_t pid) {
700 _cleanup_free_ char *fs = NULL;
701 char c[DECIMAL_STR_MAX(pid_t) + 2];
702 int r;
703
704 assert(path);
705 assert(pid >= 0);
706
707 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
708 if (r < 0)
709 return r;
710
711 if (pid == 0)
712 pid = getpid();
713
714 snprintf(c, sizeof(c), PID_FMT"\n", pid);
715
716 return write_string_file(fs, c, 0);
717 }
718
719 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
720 int r;
721
722 assert(controller);
723 assert(path);
724 assert(pid >= 0);
725
726 r = cg_attach(controller, path, pid);
727 if (r < 0) {
728 char prefix[strlen(path) + 1];
729
730 /* This didn't work? Then let's try all prefixes of
731 * the destination */
732
733 PATH_FOREACH_PREFIX(prefix, path) {
734 int q;
735
736 q = cg_attach(controller, prefix, pid);
737 if (q >= 0)
738 return q;
739 }
740 }
741
742 return r;
743 }
744
745 int cg_set_group_access(
746 const char *controller,
747 const char *path,
748 mode_t mode,
749 uid_t uid,
750 gid_t gid) {
751
752 _cleanup_free_ char *fs = NULL;
753 int r;
754
755 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
756 return 0;
757
758 if (mode != MODE_INVALID)
759 mode &= 0777;
760
761 r = cg_get_path(controller, path, NULL, &fs);
762 if (r < 0)
763 return r;
764
765 return chmod_and_chown(fs, mode, uid, gid);
766 }
767
768 int cg_set_task_access(
769 const char *controller,
770 const char *path,
771 mode_t mode,
772 uid_t uid,
773 gid_t gid) {
774
775 _cleanup_free_ char *fs = NULL, *procs = NULL;
776 int r, unified;
777
778 assert(path);
779
780 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
781 return 0;
782
783 if (mode != MODE_INVALID)
784 mode &= 0666;
785
786 r = cg_get_path(controller, path, "cgroup.procs", &fs);
787 if (r < 0)
788 return r;
789
790 r = chmod_and_chown(fs, mode, uid, gid);
791 if (r < 0)
792 return r;
793
794 unified = cg_unified();
795 if (unified < 0)
796 return unified;
797 if (unified)
798 return 0;
799
800 /* Compatibility, Always keep values for "tasks" in sync with
801 * "cgroup.procs" */
802 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
803 (void) chmod_and_chown(procs, mode, uid, gid);
804
805 return 0;
806 }
807
808 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
809 _cleanup_fclose_ FILE *f = NULL;
810 char line[LINE_MAX];
811 const char *fs;
812 size_t cs = 0;
813 int unified;
814
815 assert(path);
816 assert(pid >= 0);
817
818 unified = cg_unified();
819 if (unified < 0)
820 return unified;
821 if (unified == 0) {
822 if (controller) {
823 if (!cg_controller_is_valid(controller))
824 return -EINVAL;
825 } else
826 controller = SYSTEMD_CGROUP_CONTROLLER;
827
828 cs = strlen(controller);
829 }
830
831 fs = procfs_file_alloca(pid, "cgroup");
832 f = fopen(fs, "re");
833 if (!f)
834 return errno == ENOENT ? -ESRCH : -errno;
835
836 FOREACH_LINE(line, f, return -errno) {
837 char *e, *p;
838
839 truncate_nl(line);
840
841 if (unified) {
842 e = startswith(line, "0:");
843 if (!e)
844 continue;
845
846 e = strchr(e, ':');
847 if (!e)
848 continue;
849 } else {
850 char *l;
851 size_t k;
852 const char *word, *state;
853 bool found = false;
854
855 l = strchr(line, ':');
856 if (!l)
857 continue;
858
859 l++;
860 e = strchr(l, ':');
861 if (!e)
862 continue;
863
864 *e = 0;
865 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
866 if (k == cs && memcmp(word, controller, cs) == 0) {
867 found = true;
868 break;
869 }
870 }
871
872 if (!found)
873 continue;
874 }
875
876 p = strdup(e + 1);
877 if (!p)
878 return -ENOMEM;
879
880 *path = p;
881 return 0;
882 }
883
884 return -ENODATA;
885 }
886
887 int cg_install_release_agent(const char *controller, const char *agent) {
888 _cleanup_free_ char *fs = NULL, *contents = NULL;
889 const char *sc;
890 int r, unified;
891
892 assert(agent);
893
894 unified = cg_unified();
895 if (unified < 0)
896 return unified;
897 if (unified) /* doesn't apply to unified hierarchy */
898 return -EOPNOTSUPP;
899
900 r = cg_get_path(controller, NULL, "release_agent", &fs);
901 if (r < 0)
902 return r;
903
904 r = read_one_line_file(fs, &contents);
905 if (r < 0)
906 return r;
907
908 sc = strstrip(contents);
909 if (isempty(sc)) {
910 r = write_string_file(fs, agent, 0);
911 if (r < 0)
912 return r;
913 } else if (!path_equal(sc, agent))
914 return -EEXIST;
915
916 fs = mfree(fs);
917 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
918 if (r < 0)
919 return r;
920
921 contents = mfree(contents);
922 r = read_one_line_file(fs, &contents);
923 if (r < 0)
924 return r;
925
926 sc = strstrip(contents);
927 if (streq(sc, "0")) {
928 r = write_string_file(fs, "1", 0);
929 if (r < 0)
930 return r;
931
932 return 1;
933 }
934
935 if (!streq(sc, "1"))
936 return -EIO;
937
938 return 0;
939 }
940
941 int cg_uninstall_release_agent(const char *controller) {
942 _cleanup_free_ char *fs = NULL;
943 int r, unified;
944
945 unified = cg_unified();
946 if (unified < 0)
947 return unified;
948 if (unified) /* Doesn't apply to unified hierarchy */
949 return -EOPNOTSUPP;
950
951 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
952 if (r < 0)
953 return r;
954
955 r = write_string_file(fs, "0", 0);
956 if (r < 0)
957 return r;
958
959 fs = mfree(fs);
960
961 r = cg_get_path(controller, NULL, "release_agent", &fs);
962 if (r < 0)
963 return r;
964
965 r = write_string_file(fs, "", 0);
966 if (r < 0)
967 return r;
968
969 return 0;
970 }
971
972 int cg_is_empty(const char *controller, const char *path) {
973 _cleanup_fclose_ FILE *f = NULL;
974 pid_t pid;
975 int r;
976
977 assert(path);
978
979 r = cg_enumerate_processes(controller, path, &f);
980 if (r == -ENOENT)
981 return 1;
982 if (r < 0)
983 return r;
984
985 r = cg_read_pid(f, &pid);
986 if (r < 0)
987 return r;
988
989 return r == 0;
990 }
991
992 int cg_is_empty_recursive(const char *controller, const char *path) {
993 int unified, r;
994
995 assert(path);
996
997 /* The root cgroup is always populated */
998 if (controller && (isempty(path) || path_equal(path, "/")))
999 return false;
1000
1001 unified = cg_unified();
1002 if (unified < 0)
1003 return unified;
1004
1005 if (unified > 0) {
1006 _cleanup_free_ char *populated = NULL, *t = NULL;
1007
1008 /* On the unified hierarchy we can check empty state
1009 * via the "cgroup.populated" attribute. */
1010
1011 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1012 if (r < 0)
1013 return r;
1014
1015 r = read_one_line_file(populated, &t);
1016 if (r == -ENOENT)
1017 return 1;
1018 if (r < 0)
1019 return r;
1020
1021 return streq(t, "0");
1022 } else {
1023 _cleanup_closedir_ DIR *d = NULL;
1024 char *fn;
1025
1026 r = cg_is_empty(controller, path);
1027 if (r <= 0)
1028 return r;
1029
1030 r = cg_enumerate_subgroups(controller, path, &d);
1031 if (r == -ENOENT)
1032 return 1;
1033 if (r < 0)
1034 return r;
1035
1036 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1037 _cleanup_free_ char *p = NULL;
1038
1039 p = strjoin(path, "/", fn, NULL);
1040 free(fn);
1041 if (!p)
1042 return -ENOMEM;
1043
1044 r = cg_is_empty_recursive(controller, p);
1045 if (r <= 0)
1046 return r;
1047 }
1048 if (r < 0)
1049 return r;
1050
1051 return true;
1052 }
1053 }
1054
1055 int cg_split_spec(const char *spec, char **controller, char **path) {
1056 char *t = NULL, *u = NULL;
1057 const char *e;
1058
1059 assert(spec);
1060
1061 if (*spec == '/') {
1062 if (!path_is_safe(spec))
1063 return -EINVAL;
1064
1065 if (path) {
1066 t = strdup(spec);
1067 if (!t)
1068 return -ENOMEM;
1069
1070 *path = path_kill_slashes(t);
1071 }
1072
1073 if (controller)
1074 *controller = NULL;
1075
1076 return 0;
1077 }
1078
1079 e = strchr(spec, ':');
1080 if (!e) {
1081 if (!cg_controller_is_valid(spec))
1082 return -EINVAL;
1083
1084 if (controller) {
1085 t = strdup(spec);
1086 if (!t)
1087 return -ENOMEM;
1088
1089 *controller = t;
1090 }
1091
1092 if (path)
1093 *path = NULL;
1094
1095 return 0;
1096 }
1097
1098 t = strndup(spec, e-spec);
1099 if (!t)
1100 return -ENOMEM;
1101 if (!cg_controller_is_valid(t)) {
1102 free(t);
1103 return -EINVAL;
1104 }
1105
1106 if (isempty(e+1))
1107 u = NULL;
1108 else {
1109 u = strdup(e+1);
1110 if (!u) {
1111 free(t);
1112 return -ENOMEM;
1113 }
1114
1115 if (!path_is_safe(u) ||
1116 !path_is_absolute(u)) {
1117 free(t);
1118 free(u);
1119 return -EINVAL;
1120 }
1121
1122 path_kill_slashes(u);
1123 }
1124
1125 if (controller)
1126 *controller = t;
1127 else
1128 free(t);
1129
1130 if (path)
1131 *path = u;
1132 else
1133 free(u);
1134
1135 return 0;
1136 }
1137
1138 int cg_mangle_path(const char *path, char **result) {
1139 _cleanup_free_ char *c = NULL, *p = NULL;
1140 char *t;
1141 int r;
1142
1143 assert(path);
1144 assert(result);
1145
1146 /* First, check if it already is a filesystem path */
1147 if (path_startswith(path, "/sys/fs/cgroup")) {
1148
1149 t = strdup(path);
1150 if (!t)
1151 return -ENOMEM;
1152
1153 *result = path_kill_slashes(t);
1154 return 0;
1155 }
1156
1157 /* Otherwise, treat it as cg spec */
1158 r = cg_split_spec(path, &c, &p);
1159 if (r < 0)
1160 return r;
1161
1162 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1163 }
1164
1165 int cg_get_root_path(char **path) {
1166 char *p, *e;
1167 int r;
1168
1169 assert(path);
1170
1171 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1172 if (r < 0)
1173 return r;
1174
1175 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1176 if (!e)
1177 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1178 if (!e)
1179 e = endswith(p, "/system"); /* even more legacy */
1180 if (e)
1181 *e = 0;
1182
1183 *path = p;
1184 return 0;
1185 }
1186
1187 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1188 _cleanup_free_ char *rt = NULL;
1189 char *p;
1190 int r;
1191
1192 assert(cgroup);
1193 assert(shifted);
1194
1195 if (!root) {
1196 /* If the root was specified let's use that, otherwise
1197 * let's determine it from PID 1 */
1198
1199 r = cg_get_root_path(&rt);
1200 if (r < 0)
1201 return r;
1202
1203 root = rt;
1204 }
1205
1206 p = path_startswith(cgroup, root);
1207 if (p && p > cgroup)
1208 *shifted = p - 1;
1209 else
1210 *shifted = cgroup;
1211
1212 return 0;
1213 }
1214
1215 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1216 _cleanup_free_ char *raw = NULL;
1217 const char *c;
1218 int r;
1219
1220 assert(pid >= 0);
1221 assert(cgroup);
1222
1223 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1224 if (r < 0)
1225 return r;
1226
1227 r = cg_shift_path(raw, root, &c);
1228 if (r < 0)
1229 return r;
1230
1231 if (c == raw) {
1232 *cgroup = raw;
1233 raw = NULL;
1234 } else {
1235 char *n;
1236
1237 n = strdup(c);
1238 if (!n)
1239 return -ENOMEM;
1240
1241 *cgroup = n;
1242 }
1243
1244 return 0;
1245 }
1246
1247 int cg_path_decode_unit(const char *cgroup, char **unit){
1248 char *c, *s;
1249 size_t n;
1250
1251 assert(cgroup);
1252 assert(unit);
1253
1254 n = strcspn(cgroup, "/");
1255 if (n < 3)
1256 return -ENXIO;
1257
1258 c = strndupa(cgroup, n);
1259 c = cg_unescape(c);
1260
1261 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1262 return -ENXIO;
1263
1264 s = strdup(c);
1265 if (!s)
1266 return -ENOMEM;
1267
1268 *unit = s;
1269 return 0;
1270 }
1271
1272 static bool valid_slice_name(const char *p, size_t n) {
1273
1274 if (!p)
1275 return false;
1276
1277 if (n < strlen("x.slice"))
1278 return false;
1279
1280 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1281 char buf[n+1], *c;
1282
1283 memcpy(buf, p, n);
1284 buf[n] = 0;
1285
1286 c = cg_unescape(buf);
1287
1288 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1289 }
1290
1291 return false;
1292 }
1293
1294 static const char *skip_slices(const char *p) {
1295 assert(p);
1296
1297 /* Skips over all slice assignments */
1298
1299 for (;;) {
1300 size_t n;
1301
1302 p += strspn(p, "/");
1303
1304 n = strcspn(p, "/");
1305 if (!valid_slice_name(p, n))
1306 return p;
1307
1308 p += n;
1309 }
1310 }
1311
1312 int cg_path_get_unit(const char *path, char **ret) {
1313 const char *e;
1314 char *unit;
1315 int r;
1316
1317 assert(path);
1318 assert(ret);
1319
1320 e = skip_slices(path);
1321
1322 r = cg_path_decode_unit(e, &unit);
1323 if (r < 0)
1324 return r;
1325
1326 /* We skipped over the slices, don't accept any now */
1327 if (endswith(unit, ".slice")) {
1328 free(unit);
1329 return -ENXIO;
1330 }
1331
1332 *ret = unit;
1333 return 0;
1334 }
1335
1336 int cg_pid_get_unit(pid_t pid, char **unit) {
1337 _cleanup_free_ char *cgroup = NULL;
1338 int r;
1339
1340 assert(unit);
1341
1342 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1343 if (r < 0)
1344 return r;
1345
1346 return cg_path_get_unit(cgroup, unit);
1347 }
1348
1349 /**
1350 * Skip session-*.scope, but require it to be there.
1351 */
1352 static const char *skip_session(const char *p) {
1353 size_t n;
1354
1355 if (isempty(p))
1356 return NULL;
1357
1358 p += strspn(p, "/");
1359
1360 n = strcspn(p, "/");
1361 if (n < strlen("session-x.scope"))
1362 return NULL;
1363
1364 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1365 char buf[n - 8 - 6 + 1];
1366
1367 memcpy(buf, p + 8, n - 8 - 6);
1368 buf[n - 8 - 6] = 0;
1369
1370 /* Note that session scopes never need unescaping,
1371 * since they cannot conflict with the kernel's own
1372 * names, hence we don't need to call cg_unescape()
1373 * here. */
1374
1375 if (!session_id_valid(buf))
1376 return false;
1377
1378 p += n;
1379 p += strspn(p, "/");
1380 return p;
1381 }
1382
1383 return NULL;
1384 }
1385
1386 /**
1387 * Skip user@*.service, but require it to be there.
1388 */
1389 static const char *skip_user_manager(const char *p) {
1390 size_t n;
1391
1392 if (isempty(p))
1393 return NULL;
1394
1395 p += strspn(p, "/");
1396
1397 n = strcspn(p, "/");
1398 if (n < strlen("user@x.service"))
1399 return NULL;
1400
1401 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1402 char buf[n - 5 - 8 + 1];
1403
1404 memcpy(buf, p + 5, n - 5 - 8);
1405 buf[n - 5 - 8] = 0;
1406
1407 /* Note that user manager services never need unescaping,
1408 * since they cannot conflict with the kernel's own
1409 * names, hence we don't need to call cg_unescape()
1410 * here. */
1411
1412 if (parse_uid(buf, NULL) < 0)
1413 return NULL;
1414
1415 p += n;
1416 p += strspn(p, "/");
1417
1418 return p;
1419 }
1420
1421 return NULL;
1422 }
1423
1424 static const char *skip_user_prefix(const char *path) {
1425 const char *e, *t;
1426
1427 assert(path);
1428
1429 /* Skip slices, if there are any */
1430 e = skip_slices(path);
1431
1432 /* Skip the user manager, if it's in the path now... */
1433 t = skip_user_manager(e);
1434 if (t)
1435 return t;
1436
1437 /* Alternatively skip the user session if it is in the path... */
1438 return skip_session(e);
1439 }
1440
1441 int cg_path_get_user_unit(const char *path, char **ret) {
1442 const char *t;
1443
1444 assert(path);
1445 assert(ret);
1446
1447 t = skip_user_prefix(path);
1448 if (!t)
1449 return -ENXIO;
1450
1451 /* And from here on it looks pretty much the same as for a
1452 * system unit, hence let's use the same parser from here
1453 * on. */
1454 return cg_path_get_unit(t, ret);
1455 }
1456
1457 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1458 _cleanup_free_ char *cgroup = NULL;
1459 int r;
1460
1461 assert(unit);
1462
1463 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1464 if (r < 0)
1465 return r;
1466
1467 return cg_path_get_user_unit(cgroup, unit);
1468 }
1469
1470 int cg_path_get_machine_name(const char *path, char **machine) {
1471 _cleanup_free_ char *u = NULL;
1472 const char *sl;
1473 int r;
1474
1475 r = cg_path_get_unit(path, &u);
1476 if (r < 0)
1477 return r;
1478
1479 sl = strjoina("/run/systemd/machines/unit:", u);
1480 return readlink_malloc(sl, machine);
1481 }
1482
1483 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1484 _cleanup_free_ char *cgroup = NULL;
1485 int r;
1486
1487 assert(machine);
1488
1489 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1490 if (r < 0)
1491 return r;
1492
1493 return cg_path_get_machine_name(cgroup, machine);
1494 }
1495
1496 int cg_path_get_session(const char *path, char **session) {
1497 _cleanup_free_ char *unit = NULL;
1498 char *start, *end;
1499 int r;
1500
1501 assert(path);
1502
1503 r = cg_path_get_unit(path, &unit);
1504 if (r < 0)
1505 return r;
1506
1507 start = startswith(unit, "session-");
1508 if (!start)
1509 return -ENXIO;
1510 end = endswith(start, ".scope");
1511 if (!end)
1512 return -ENXIO;
1513
1514 *end = 0;
1515 if (!session_id_valid(start))
1516 return -ENXIO;
1517
1518 if (session) {
1519 char *rr;
1520
1521 rr = strdup(start);
1522 if (!rr)
1523 return -ENOMEM;
1524
1525 *session = rr;
1526 }
1527
1528 return 0;
1529 }
1530
1531 int cg_pid_get_session(pid_t pid, char **session) {
1532 _cleanup_free_ char *cgroup = NULL;
1533 int r;
1534
1535 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1536 if (r < 0)
1537 return r;
1538
1539 return cg_path_get_session(cgroup, session);
1540 }
1541
1542 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1543 _cleanup_free_ char *slice = NULL;
1544 char *start, *end;
1545 int r;
1546
1547 assert(path);
1548
1549 r = cg_path_get_slice(path, &slice);
1550 if (r < 0)
1551 return r;
1552
1553 start = startswith(slice, "user-");
1554 if (!start)
1555 return -ENXIO;
1556 end = endswith(start, ".slice");
1557 if (!end)
1558 return -ENXIO;
1559
1560 *end = 0;
1561 if (parse_uid(start, uid) < 0)
1562 return -ENXIO;
1563
1564 return 0;
1565 }
1566
1567 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1568 _cleanup_free_ char *cgroup = NULL;
1569 int r;
1570
1571 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1572 if (r < 0)
1573 return r;
1574
1575 return cg_path_get_owner_uid(cgroup, uid);
1576 }
1577
1578 int cg_path_get_slice(const char *p, char **slice) {
1579 const char *e = NULL;
1580
1581 assert(p);
1582 assert(slice);
1583
1584 /* Finds the right-most slice unit from the beginning, but
1585 * stops before we come to the first non-slice unit. */
1586
1587 for (;;) {
1588 size_t n;
1589
1590 p += strspn(p, "/");
1591
1592 n = strcspn(p, "/");
1593 if (!valid_slice_name(p, n)) {
1594
1595 if (!e) {
1596 char *s;
1597
1598 s = strdup("-.slice");
1599 if (!s)
1600 return -ENOMEM;
1601
1602 *slice = s;
1603 return 0;
1604 }
1605
1606 return cg_path_decode_unit(e, slice);
1607 }
1608
1609 e = p;
1610 p += n;
1611 }
1612 }
1613
1614 int cg_pid_get_slice(pid_t pid, char **slice) {
1615 _cleanup_free_ char *cgroup = NULL;
1616 int r;
1617
1618 assert(slice);
1619
1620 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1621 if (r < 0)
1622 return r;
1623
1624 return cg_path_get_slice(cgroup, slice);
1625 }
1626
1627 int cg_path_get_user_slice(const char *p, char **slice) {
1628 const char *t;
1629 assert(p);
1630 assert(slice);
1631
1632 t = skip_user_prefix(p);
1633 if (!t)
1634 return -ENXIO;
1635
1636 /* And now it looks pretty much the same as for a system
1637 * slice, so let's just use the same parser from here on. */
1638 return cg_path_get_slice(t, slice);
1639 }
1640
1641 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1642 _cleanup_free_ char *cgroup = NULL;
1643 int r;
1644
1645 assert(slice);
1646
1647 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1648 if (r < 0)
1649 return r;
1650
1651 return cg_path_get_user_slice(cgroup, slice);
1652 }
1653
1654 char *cg_escape(const char *p) {
1655 bool need_prefix = false;
1656
1657 /* This implements very minimal escaping for names to be used
1658 * as file names in the cgroup tree: any name which might
1659 * conflict with a kernel name or is prefixed with '_' is
1660 * prefixed with a '_'. That way, when reading cgroup names it
1661 * is sufficient to remove a single prefixing underscore if
1662 * there is one. */
1663
1664 /* The return value of this function (unlike cg_unescape())
1665 * needs free()! */
1666
1667 if (p[0] == 0 ||
1668 p[0] == '_' ||
1669 p[0] == '.' ||
1670 streq(p, "notify_on_release") ||
1671 streq(p, "release_agent") ||
1672 streq(p, "tasks") ||
1673 startswith(p, "cgroup."))
1674 need_prefix = true;
1675 else {
1676 const char *dot;
1677
1678 dot = strrchr(p, '.');
1679 if (dot) {
1680 CGroupController c;
1681 size_t l = dot - p;
1682
1683 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1684 const char *n;
1685
1686 n = cgroup_controller_to_string(c);
1687
1688 if (l != strlen(n))
1689 continue;
1690
1691 if (memcmp(p, n, l) != 0)
1692 continue;
1693
1694 need_prefix = true;
1695 break;
1696 }
1697 }
1698 }
1699
1700 if (need_prefix)
1701 return strappend("_", p);
1702
1703 return strdup(p);
1704 }
1705
1706 char *cg_unescape(const char *p) {
1707 assert(p);
1708
1709 /* The return value of this function (unlike cg_escape())
1710 * doesn't need free()! */
1711
1712 if (p[0] == '_')
1713 return (char*) p+1;
1714
1715 return (char*) p;
1716 }
1717
1718 #define CONTROLLER_VALID \
1719 DIGITS LETTERS \
1720 "_"
1721
1722 bool cg_controller_is_valid(const char *p) {
1723 const char *t, *s;
1724
1725 if (!p)
1726 return false;
1727
1728 s = startswith(p, "name=");
1729 if (s)
1730 p = s;
1731
1732 if (*p == 0 || *p == '_')
1733 return false;
1734
1735 for (t = p; *t; t++)
1736 if (!strchr(CONTROLLER_VALID, *t))
1737 return false;
1738
1739 if (t - p > FILENAME_MAX)
1740 return false;
1741
1742 return true;
1743 }
1744
1745 int cg_slice_to_path(const char *unit, char **ret) {
1746 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1747 const char *dash;
1748 int r;
1749
1750 assert(unit);
1751 assert(ret);
1752
1753 if (streq(unit, "-.slice")) {
1754 char *x;
1755
1756 x = strdup("");
1757 if (!x)
1758 return -ENOMEM;
1759 *ret = x;
1760 return 0;
1761 }
1762
1763 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1764 return -EINVAL;
1765
1766 if (!endswith(unit, ".slice"))
1767 return -EINVAL;
1768
1769 r = unit_name_to_prefix(unit, &p);
1770 if (r < 0)
1771 return r;
1772
1773 dash = strchr(p, '-');
1774
1775 /* Don't allow initial dashes */
1776 if (dash == p)
1777 return -EINVAL;
1778
1779 while (dash) {
1780 _cleanup_free_ char *escaped = NULL;
1781 char n[dash - p + sizeof(".slice")];
1782
1783 /* Don't allow trailing or double dashes */
1784 if (dash[1] == 0 || dash[1] == '-')
1785 return -EINVAL;
1786
1787 strcpy(stpncpy(n, p, dash - p), ".slice");
1788 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1789 return -EINVAL;
1790
1791 escaped = cg_escape(n);
1792 if (!escaped)
1793 return -ENOMEM;
1794
1795 if (!strextend(&s, escaped, "/", NULL))
1796 return -ENOMEM;
1797
1798 dash = strchr(dash+1, '-');
1799 }
1800
1801 e = cg_escape(unit);
1802 if (!e)
1803 return -ENOMEM;
1804
1805 if (!strextend(&s, e, NULL))
1806 return -ENOMEM;
1807
1808 *ret = s;
1809 s = NULL;
1810
1811 return 0;
1812 }
1813
1814 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1815 _cleanup_free_ char *p = NULL;
1816 int r;
1817
1818 r = cg_get_path(controller, path, attribute, &p);
1819 if (r < 0)
1820 return r;
1821
1822 return write_string_file(p, value, 0);
1823 }
1824
1825 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1826 _cleanup_free_ char *p = NULL;
1827 int r;
1828
1829 r = cg_get_path(controller, path, attribute, &p);
1830 if (r < 0)
1831 return r;
1832
1833 return read_one_line_file(p, ret);
1834 }
1835
1836 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1837 CGroupController c;
1838 int r, unified;
1839
1840 /* This one will create a cgroup in our private tree, but also
1841 * duplicate it in the trees specified in mask, and remove it
1842 * in all others */
1843
1844 /* First create the cgroup in our own hierarchy. */
1845 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1846 if (r < 0)
1847 return r;
1848
1849 /* If we are in the unified hierarchy, we are done now */
1850 unified = cg_unified();
1851 if (unified < 0)
1852 return unified;
1853 if (unified > 0)
1854 return 0;
1855
1856 /* Otherwise, do the same in the other hierarchies */
1857 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1858 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1859 const char *n;
1860
1861 n = cgroup_controller_to_string(c);
1862
1863 if (mask & bit)
1864 (void) cg_create(n, path);
1865 else if (supported & bit)
1866 (void) cg_trim(n, path, true);
1867 }
1868
1869 return 0;
1870 }
1871
1872 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1873 CGroupController c;
1874 int r, unified;
1875
1876 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1877 if (r < 0)
1878 return r;
1879
1880 unified = cg_unified();
1881 if (unified < 0)
1882 return unified;
1883 if (unified > 0)
1884 return 0;
1885
1886 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1887 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1888 const char *p = NULL;
1889
1890 if (!(supported & bit))
1891 continue;
1892
1893 if (path_callback)
1894 p = path_callback(bit, userdata);
1895
1896 if (!p)
1897 p = path;
1898
1899 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1900 }
1901
1902 return 0;
1903 }
1904
1905 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1906 Iterator i;
1907 void *pidp;
1908 int r = 0;
1909
1910 SET_FOREACH(pidp, pids, i) {
1911 pid_t pid = PTR_TO_PID(pidp);
1912 int q;
1913
1914 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1915 if (q < 0 && r >= 0)
1916 r = q;
1917 }
1918
1919 return r;
1920 }
1921
1922 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1923 CGroupController c;
1924 int r = 0, unified;
1925
1926 if (!path_equal(from, to)) {
1927 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1928 if (r < 0)
1929 return r;
1930 }
1931
1932 unified = cg_unified();
1933 if (unified < 0)
1934 return unified;
1935 if (unified > 0)
1936 return r;
1937
1938 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1939 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1940 const char *p = NULL;
1941
1942 if (!(supported & bit))
1943 continue;
1944
1945 if (to_callback)
1946 p = to_callback(bit, userdata);
1947
1948 if (!p)
1949 p = to;
1950
1951 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1952 }
1953
1954 return 0;
1955 }
1956
1957 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1958 CGroupController c;
1959 int r, unified;
1960
1961 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1962 if (r < 0)
1963 return r;
1964
1965 unified = cg_unified();
1966 if (unified < 0)
1967 return unified;
1968 if (unified > 0)
1969 return r;
1970
1971 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1972 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1973
1974 if (!(supported & bit))
1975 continue;
1976
1977 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1978 }
1979
1980 return 0;
1981 }
1982
1983 int cg_mask_supported(CGroupMask *ret) {
1984 CGroupMask mask = 0;
1985 int r, unified;
1986
1987 /* Determines the mask of supported cgroup controllers. Only
1988 * includes controllers we can make sense of and that are
1989 * actually accessible. */
1990
1991 unified = cg_unified();
1992 if (unified < 0)
1993 return unified;
1994 if (unified > 0) {
1995 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1996 const char *c;
1997
1998 /* In the unified hierarchy we can read the supported
1999 * and accessible controllers from a the top-level
2000 * cgroup attribute */
2001
2002 r = cg_get_root_path(&root);
2003 if (r < 0)
2004 return r;
2005
2006 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2007 if (r < 0)
2008 return r;
2009
2010 r = read_one_line_file(path, &controllers);
2011 if (r < 0)
2012 return r;
2013
2014 c = controllers;
2015 for (;;) {
2016 _cleanup_free_ char *n = NULL;
2017 CGroupController v;
2018
2019 r = extract_first_word(&c, &n, NULL, 0);
2020 if (r < 0)
2021 return r;
2022 if (r == 0)
2023 break;
2024
2025 v = cgroup_controller_from_string(n);
2026 if (v < 0)
2027 continue;
2028
2029 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2030 }
2031
2032 /* Currently, we only support the memory and pids
2033 * controller in the unified hierarchy, mask
2034 * everything else off. */
2035 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2036
2037 } else {
2038 CGroupController c;
2039
2040 /* In the legacy hierarchy, we check whether which
2041 * hierarchies are mounted. */
2042
2043 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2044 const char *n;
2045
2046 n = cgroup_controller_to_string(c);
2047 if (controller_is_accessible(n) >= 0)
2048 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2049 }
2050 }
2051
2052 *ret = mask;
2053 return 0;
2054 }
2055
2056 int cg_kernel_controllers(Set *controllers) {
2057 _cleanup_fclose_ FILE *f = NULL;
2058 char buf[LINE_MAX];
2059 int r;
2060
2061 assert(controllers);
2062
2063 /* Determines the full list of kernel-known controllers. Might
2064 * include controllers we don't actually support, arbitrary
2065 * named hierarchies and controllers that aren't currently
2066 * accessible (because not mounted). */
2067
2068 f = fopen("/proc/cgroups", "re");
2069 if (!f) {
2070 if (errno == ENOENT)
2071 return 0;
2072 return -errno;
2073 }
2074
2075 /* Ignore the header line */
2076 (void) fgets(buf, sizeof(buf), f);
2077
2078 for (;;) {
2079 char *controller;
2080 int enabled = 0;
2081
2082 errno = 0;
2083 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2084
2085 if (feof(f))
2086 break;
2087
2088 if (ferror(f) && errno != 0)
2089 return -errno;
2090
2091 return -EBADMSG;
2092 }
2093
2094 if (!enabled) {
2095 free(controller);
2096 continue;
2097 }
2098
2099 if (!cg_controller_is_valid(controller)) {
2100 free(controller);
2101 return -EBADMSG;
2102 }
2103
2104 r = set_consume(controllers, controller);
2105 if (r < 0)
2106 return r;
2107 }
2108
2109 return 0;
2110 }
2111
2112 static thread_local int unified_cache = -1;
2113
2114 int cg_unified(void) {
2115 struct statfs fs;
2116
2117 /* Checks if we support the unified hierarchy. Returns an
2118 * error when the cgroup hierarchies aren't mounted yet or we
2119 * have any other trouble determining if the unified hierarchy
2120 * is supported. */
2121
2122 if (unified_cache >= 0)
2123 return unified_cache;
2124
2125 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2126 return -errno;
2127
2128 if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2129 unified_cache = true;
2130 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2131 unified_cache = false;
2132 else
2133 return -ENOEXEC;
2134
2135 return unified_cache;
2136 }
2137
2138 void cg_unified_flush(void) {
2139 unified_cache = -1;
2140 }
2141
2142 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2143 _cleanup_free_ char *fs = NULL;
2144 CGroupController c;
2145 int r, unified;
2146
2147 assert(p);
2148
2149 if (supported == 0)
2150 return 0;
2151
2152 unified = cg_unified();
2153 if (unified < 0)
2154 return unified;
2155 if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2156 return 0;
2157
2158 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2159 if (r < 0)
2160 return r;
2161
2162 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2163 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2164 const char *n;
2165
2166 if (!(supported & bit))
2167 continue;
2168
2169 n = cgroup_controller_to_string(c);
2170 {
2171 char s[1 + strlen(n) + 1];
2172
2173 s[0] = mask & bit ? '+' : '-';
2174 strcpy(s + 1, n);
2175
2176 r = write_string_file(fs, s, 0);
2177 if (r < 0)
2178 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2179 }
2180 }
2181
2182 return 0;
2183 }
2184
2185 bool cg_is_unified_wanted(void) {
2186 static thread_local int wanted = -1;
2187 int r, unified;
2188
2189 /* If the hierarchy is already mounted, then follow whatever
2190 * was chosen for it. */
2191 unified = cg_unified();
2192 if (unified >= 0)
2193 return unified;
2194
2195 /* Otherwise, let's see what the kernel command line has to
2196 * say. Since checking that is expensive, let's cache the
2197 * result. */
2198 if (wanted >= 0)
2199 return wanted;
2200
2201 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2202 if (r > 0)
2203 return (wanted = true);
2204 else {
2205 _cleanup_free_ char *value = NULL;
2206
2207 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2208 if (r < 0)
2209 return false;
2210 if (r == 0)
2211 return (wanted = false);
2212
2213 return (wanted = parse_boolean(value) > 0);
2214 }
2215 }
2216
2217 bool cg_is_legacy_wanted(void) {
2218 return !cg_is_unified_wanted();
2219 }
2220
2221 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2222 uint64_t u;
2223 int r;
2224
2225 if (isempty(s)) {
2226 *ret = CGROUP_CPU_SHARES_INVALID;
2227 return 0;
2228 }
2229
2230 r = safe_atou64(s, &u);
2231 if (r < 0)
2232 return r;
2233
2234 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2235 return -ERANGE;
2236
2237 *ret = u;
2238 return 0;
2239 }
2240
2241 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2242 uint64_t u;
2243 int r;
2244
2245 if (isempty(s)) {
2246 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2247 return 0;
2248 }
2249
2250 r = safe_atou64(s, &u);
2251 if (r < 0)
2252 return r;
2253
2254 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2255 return -ERANGE;
2256
2257 *ret = u;
2258 return 0;
2259 }
2260
2261 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2262 [CGROUP_CONTROLLER_CPU] = "cpu",
2263 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2264 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2265 [CGROUP_CONTROLLER_MEMORY] = "memory",
2266 [CGROUP_CONTROLLER_DEVICES] = "devices",
2267 [CGROUP_CONTROLLER_PIDS] = "pids",
2268 [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2269 };
2270
2271 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);