]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
core: make SYSTEMD_CGROUP_CONTROLLER a special string
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <ftw.h>
23 #include <limits.h>
24 #include <signal.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <sys/xattr.h>
32 #include <unistd.h>
33
34 #include "alloc-util.h"
35 #include "cgroup-util.h"
36 #include "def.h"
37 #include "dirent-util.h"
38 #include "extract-word.h"
39 #include "fd-util.h"
40 #include "fileio.h"
41 #include "format-util.h"
42 #include "fs-util.h"
43 #include "log.h"
44 #include "login-util.h"
45 #include "macro.h"
46 #include "missing.h"
47 #include "mkdir.h"
48 #include "parse-util.h"
49 #include "path-util.h"
50 #include "proc-cmdline.h"
51 #include "process-util.h"
52 #include "set.h"
53 #include "special.h"
54 #include "stat-util.h"
55 #include "stdio-util.h"
56 #include "string-table.h"
57 #include "string-util.h"
58 #include "unit-name.h"
59 #include "user-util.h"
60
61 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
62 _cleanup_free_ char *fs = NULL;
63 FILE *f;
64 int r;
65
66 assert(_f);
67
68 r = cg_get_path(controller, path, "cgroup.procs", &fs);
69 if (r < 0)
70 return r;
71
72 f = fopen(fs, "re");
73 if (!f)
74 return -errno;
75
76 *_f = f;
77 return 0;
78 }
79
80 int cg_read_pid(FILE *f, pid_t *_pid) {
81 unsigned long ul;
82
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
85
86 assert(f);
87 assert(_pid);
88
89 errno = 0;
90 if (fscanf(f, "%lu", &ul) != 1) {
91
92 if (feof(f))
93 return 0;
94
95 return errno > 0 ? -errno : -EIO;
96 }
97
98 if (ul <= 0)
99 return -EIO;
100
101 *_pid = (pid_t) ul;
102 return 1;
103 }
104
105 int cg_read_event(const char *controller, const char *path, const char *event,
106 char **val)
107 {
108 _cleanup_free_ char *events = NULL, *content = NULL;
109 char *p, *line;
110 int r;
111
112 r = cg_get_path(controller, path, "cgroup.events", &events);
113 if (r < 0)
114 return r;
115
116 r = read_full_file(events, &content, NULL);
117 if (r < 0)
118 return r;
119
120 p = content;
121 while ((line = strsep(&p, "\n"))) {
122 char *key;
123
124 key = strsep(&line, " ");
125 if (!key || !line)
126 return -EINVAL;
127
128 if (strcmp(key, event))
129 continue;
130
131 *val = strdup(line);
132 return 0;
133 }
134
135 return -ENOENT;
136 }
137
138 bool cg_ns_supported(void) {
139 static thread_local int enabled = -1;
140
141 if (enabled >= 0)
142 return enabled;
143
144 if (access("/proc/self/ns/cgroup", F_OK) == 0)
145 enabled = 1;
146 else
147 enabled = 0;
148
149 return enabled;
150 }
151
152 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
153 _cleanup_free_ char *fs = NULL;
154 int r;
155 DIR *d;
156
157 assert(_d);
158
159 /* This is not recursive! */
160
161 r = cg_get_path(controller, path, NULL, &fs);
162 if (r < 0)
163 return r;
164
165 d = opendir(fs);
166 if (!d)
167 return -errno;
168
169 *_d = d;
170 return 0;
171 }
172
173 int cg_read_subgroup(DIR *d, char **fn) {
174 struct dirent *de;
175
176 assert(d);
177 assert(fn);
178
179 FOREACH_DIRENT_ALL(de, d, return -errno) {
180 char *b;
181
182 if (de->d_type != DT_DIR)
183 continue;
184
185 if (dot_or_dot_dot(de->d_name))
186 continue;
187
188 b = strdup(de->d_name);
189 if (!b)
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
196 return 0;
197 }
198
199 int cg_rmdir(const char *controller, const char *path) {
200 _cleanup_free_ char *p = NULL;
201 int r;
202
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
205 return r;
206
207 r = rmdir(p);
208 if (r < 0 && errno != ENOENT)
209 return -errno;
210
211 return 0;
212 }
213
214 int cg_kill(
215 const char *controller,
216 const char *path,
217 int sig,
218 CGroupFlags flags,
219 Set *s,
220 cg_kill_log_func_t log_kill,
221 void *userdata) {
222
223 _cleanup_set_free_ Set *allocated_set = NULL;
224 bool done = false;
225 int r, ret = 0;
226 pid_t my_pid;
227
228 assert(sig >= 0);
229
230 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
231 * SIGCONT on SIGKILL. */
232 if (IN_SET(sig, SIGCONT, SIGKILL))
233 flags &= ~CGROUP_SIGCONT;
234
235 /* This goes through the tasks list and kills them all. This
236 * is repeated until no further processes are added to the
237 * tasks list, to properly handle forking processes */
238
239 if (!s) {
240 s = allocated_set = set_new(NULL);
241 if (!s)
242 return -ENOMEM;
243 }
244
245 my_pid = getpid();
246
247 do {
248 _cleanup_fclose_ FILE *f = NULL;
249 pid_t pid = 0;
250 done = true;
251
252 r = cg_enumerate_processes(controller, path, &f);
253 if (r < 0) {
254 if (ret >= 0 && r != -ENOENT)
255 return r;
256
257 return ret;
258 }
259
260 while ((r = cg_read_pid(f, &pid)) > 0) {
261
262 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
263 continue;
264
265 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
266 continue;
267
268 if (log_kill)
269 log_kill(pid, sig, userdata);
270
271 /* If we haven't killed this process yet, kill
272 * it */
273 if (kill(pid, sig) < 0) {
274 if (ret >= 0 && errno != ESRCH)
275 ret = -errno;
276 } else {
277 if (flags & CGROUP_SIGCONT)
278 (void) kill(pid, SIGCONT);
279
280 if (ret == 0)
281 ret = 1;
282 }
283
284 done = false;
285
286 r = set_put(s, PID_TO_PTR(pid));
287 if (r < 0) {
288 if (ret >= 0)
289 return r;
290
291 return ret;
292 }
293 }
294
295 if (r < 0) {
296 if (ret >= 0)
297 return r;
298
299 return ret;
300 }
301
302 /* To avoid racing against processes which fork
303 * quicker than we can kill them we repeat this until
304 * no new pids need to be killed. */
305
306 } while (!done);
307
308 return ret;
309 }
310
311 int cg_kill_recursive(
312 const char *controller,
313 const char *path,
314 int sig,
315 CGroupFlags flags,
316 Set *s,
317 cg_kill_log_func_t log_kill,
318 void *userdata) {
319
320 _cleanup_set_free_ Set *allocated_set = NULL;
321 _cleanup_closedir_ DIR *d = NULL;
322 int r, ret;
323 char *fn;
324
325 assert(path);
326 assert(sig >= 0);
327
328 if (!s) {
329 s = allocated_set = set_new(NULL);
330 if (!s)
331 return -ENOMEM;
332 }
333
334 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
335
336 r = cg_enumerate_subgroups(controller, path, &d);
337 if (r < 0) {
338 if (ret >= 0 && r != -ENOENT)
339 return r;
340
341 return ret;
342 }
343
344 while ((r = cg_read_subgroup(d, &fn)) > 0) {
345 _cleanup_free_ char *p = NULL;
346
347 p = strjoin(path, "/", fn);
348 free(fn);
349 if (!p)
350 return -ENOMEM;
351
352 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
353 if (r != 0 && ret >= 0)
354 ret = r;
355 }
356 if (ret >= 0 && r < 0)
357 ret = r;
358
359 if (flags & CGROUP_REMOVE) {
360 r = cg_rmdir(controller, path);
361 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
362 return r;
363 }
364
365 return ret;
366 }
367
368 int cg_migrate(
369 const char *cfrom,
370 const char *pfrom,
371 const char *cto,
372 const char *pto,
373 CGroupFlags flags) {
374
375 bool done = false;
376 _cleanup_set_free_ Set *s = NULL;
377 int r, ret = 0;
378 pid_t my_pid;
379
380 assert(cfrom);
381 assert(pfrom);
382 assert(cto);
383 assert(pto);
384
385 s = set_new(NULL);
386 if (!s)
387 return -ENOMEM;
388
389 my_pid = getpid();
390
391 do {
392 _cleanup_fclose_ FILE *f = NULL;
393 pid_t pid = 0;
394 done = true;
395
396 r = cg_enumerate_processes(cfrom, pfrom, &f);
397 if (r < 0) {
398 if (ret >= 0 && r != -ENOENT)
399 return r;
400
401 return ret;
402 }
403
404 while ((r = cg_read_pid(f, &pid)) > 0) {
405
406 /* This might do weird stuff if we aren't a
407 * single-threaded program. However, we
408 * luckily know we are not */
409 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
410 continue;
411
412 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
413 continue;
414
415 /* Ignore kernel threads. Since they can only
416 * exist in the root cgroup, we only check for
417 * them there. */
418 if (cfrom &&
419 (isempty(pfrom) || path_equal(pfrom, "/")) &&
420 is_kernel_thread(pid) > 0)
421 continue;
422
423 r = cg_attach(cto, pto, pid);
424 if (r < 0) {
425 if (ret >= 0 && r != -ESRCH)
426 ret = r;
427 } else if (ret == 0)
428 ret = 1;
429
430 done = false;
431
432 r = set_put(s, PID_TO_PTR(pid));
433 if (r < 0) {
434 if (ret >= 0)
435 return r;
436
437 return ret;
438 }
439 }
440
441 if (r < 0) {
442 if (ret >= 0)
443 return r;
444
445 return ret;
446 }
447 } while (!done);
448
449 return ret;
450 }
451
452 int cg_migrate_recursive(
453 const char *cfrom,
454 const char *pfrom,
455 const char *cto,
456 const char *pto,
457 CGroupFlags flags) {
458
459 _cleanup_closedir_ DIR *d = NULL;
460 int r, ret = 0;
461 char *fn;
462
463 assert(cfrom);
464 assert(pfrom);
465 assert(cto);
466 assert(pto);
467
468 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
469
470 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
471 if (r < 0) {
472 if (ret >= 0 && r != -ENOENT)
473 return r;
474
475 return ret;
476 }
477
478 while ((r = cg_read_subgroup(d, &fn)) > 0) {
479 _cleanup_free_ char *p = NULL;
480
481 p = strjoin(pfrom, "/", fn);
482 free(fn);
483 if (!p)
484 return -ENOMEM;
485
486 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
487 if (r != 0 && ret >= 0)
488 ret = r;
489 }
490
491 if (r < 0 && ret >= 0)
492 ret = r;
493
494 if (flags & CGROUP_REMOVE) {
495 r = cg_rmdir(cfrom, pfrom);
496 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
497 return r;
498 }
499
500 return ret;
501 }
502
503 int cg_migrate_recursive_fallback(
504 const char *cfrom,
505 const char *pfrom,
506 const char *cto,
507 const char *pto,
508 CGroupFlags flags) {
509
510 int r;
511
512 assert(cfrom);
513 assert(pfrom);
514 assert(cto);
515 assert(pto);
516
517 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
518 if (r < 0) {
519 char prefix[strlen(pto) + 1];
520
521 /* This didn't work? Then let's try all prefixes of the destination */
522
523 PATH_FOREACH_PREFIX(prefix, pto) {
524 int q;
525
526 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
527 if (q >= 0)
528 return q;
529 }
530 }
531
532 return r;
533 }
534
535 static const char *controller_to_dirname(const char *controller) {
536 const char *e;
537
538 assert(controller);
539
540 /* Converts a controller name to the directory name below
541 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
542 * just cuts off the name= prefixed used for named
543 * hierarchies, if it is specified. */
544
545 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
546 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
547
548 e = startswith(controller, "name=");
549 if (e)
550 return e;
551
552 return controller;
553 }
554
555 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
556 const char *dn;
557 char *t = NULL;
558
559 assert(fs);
560 assert(controller);
561
562 dn = controller_to_dirname(controller);
563
564 if (isempty(path) && isempty(suffix))
565 t = strappend("/sys/fs/cgroup/", dn);
566 else if (isempty(path))
567 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
568 else if (isempty(suffix))
569 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
570 else
571 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
572 if (!t)
573 return -ENOMEM;
574
575 *fs = t;
576 return 0;
577 }
578
579 static int join_path_unified(const char *path, const char *suffix, char **fs) {
580 char *t;
581
582 assert(fs);
583
584 if (isempty(path) && isempty(suffix))
585 t = strdup("/sys/fs/cgroup");
586 else if (isempty(path))
587 t = strappend("/sys/fs/cgroup/", suffix);
588 else if (isempty(suffix))
589 t = strappend("/sys/fs/cgroup/", path);
590 else
591 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
592 if (!t)
593 return -ENOMEM;
594
595 *fs = t;
596 return 0;
597 }
598
599 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
600 int r;
601
602 assert(fs);
603
604 if (!controller) {
605 char *t;
606
607 /* If no controller is specified, we return the path
608 * *below* the controllers, without any prefix. */
609
610 if (!path && !suffix)
611 return -EINVAL;
612
613 if (!suffix)
614 t = strdup(path);
615 else if (!path)
616 t = strdup(suffix);
617 else
618 t = strjoin(path, "/", suffix);
619 if (!t)
620 return -ENOMEM;
621
622 *fs = path_kill_slashes(t);
623 return 0;
624 }
625
626 if (!cg_controller_is_valid(controller))
627 return -EINVAL;
628
629 if (cg_all_unified())
630 r = join_path_unified(path, suffix, fs);
631 else
632 r = join_path_legacy(controller, path, suffix, fs);
633 if (r < 0)
634 return r;
635
636 path_kill_slashes(*fs);
637 return 0;
638 }
639
640 static int controller_is_accessible(const char *controller) {
641
642 assert(controller);
643
644 /* Checks whether a specific controller is accessible,
645 * i.e. its hierarchy mounted. In the unified hierarchy all
646 * controllers are considered accessible, except for the named
647 * hierarchies */
648
649 if (!cg_controller_is_valid(controller))
650 return -EINVAL;
651
652 if (cg_all_unified()) {
653 /* We don't support named hierarchies if we are using
654 * the unified hierarchy. */
655
656 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
657 return 0;
658
659 if (startswith(controller, "name="))
660 return -EOPNOTSUPP;
661
662 } else {
663 const char *cc, *dn;
664
665 dn = controller_to_dirname(controller);
666 cc = strjoina("/sys/fs/cgroup/", dn);
667
668 if (laccess(cc, F_OK) < 0)
669 return -errno;
670 }
671
672 return 0;
673 }
674
675 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
676 int r;
677
678 assert(controller);
679 assert(fs);
680
681 /* Check if the specified controller is actually accessible */
682 r = controller_is_accessible(controller);
683 if (r < 0)
684 return r;
685
686 return cg_get_path(controller, path, suffix, fs);
687 }
688
689 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
690 assert(path);
691 assert(sb);
692 assert(ftwbuf);
693
694 if (typeflag != FTW_DP)
695 return 0;
696
697 if (ftwbuf->level < 1)
698 return 0;
699
700 (void) rmdir(path);
701 return 0;
702 }
703
704 int cg_trim(const char *controller, const char *path, bool delete_root) {
705 _cleanup_free_ char *fs = NULL;
706 int r = 0;
707
708 assert(path);
709
710 r = cg_get_path(controller, path, NULL, &fs);
711 if (r < 0)
712 return r;
713
714 errno = 0;
715 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
716 if (errno == ENOENT)
717 r = 0;
718 else if (errno > 0)
719 r = -errno;
720 else
721 r = -EIO;
722 }
723
724 if (delete_root) {
725 if (rmdir(fs) < 0 && errno != ENOENT)
726 return -errno;
727 }
728
729 return r;
730 }
731
732 int cg_create(const char *controller, const char *path) {
733 _cleanup_free_ char *fs = NULL;
734 int r;
735
736 r = cg_get_path_and_check(controller, path, NULL, &fs);
737 if (r < 0)
738 return r;
739
740 r = mkdir_parents(fs, 0755);
741 if (r < 0)
742 return r;
743
744 if (mkdir(fs, 0755) < 0) {
745
746 if (errno == EEXIST)
747 return 0;
748
749 return -errno;
750 }
751
752 return 1;
753 }
754
755 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
756 int r, q;
757
758 assert(pid >= 0);
759
760 r = cg_create(controller, path);
761 if (r < 0)
762 return r;
763
764 q = cg_attach(controller, path, pid);
765 if (q < 0)
766 return q;
767
768 /* This does not remove the cgroup on failure */
769 return r;
770 }
771
772 int cg_attach(const char *controller, const char *path, pid_t pid) {
773 _cleanup_free_ char *fs = NULL;
774 char c[DECIMAL_STR_MAX(pid_t) + 2];
775 int r;
776
777 assert(path);
778 assert(pid >= 0);
779
780 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
781 if (r < 0)
782 return r;
783
784 if (pid == 0)
785 pid = getpid();
786
787 xsprintf(c, PID_FMT "\n", pid);
788
789 return write_string_file(fs, c, 0);
790 }
791
792 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
793 int r;
794
795 assert(controller);
796 assert(path);
797 assert(pid >= 0);
798
799 r = cg_attach(controller, path, pid);
800 if (r < 0) {
801 char prefix[strlen(path) + 1];
802
803 /* This didn't work? Then let's try all prefixes of
804 * the destination */
805
806 PATH_FOREACH_PREFIX(prefix, path) {
807 int q;
808
809 q = cg_attach(controller, prefix, pid);
810 if (q >= 0)
811 return q;
812 }
813 }
814
815 return r;
816 }
817
818 int cg_set_group_access(
819 const char *controller,
820 const char *path,
821 mode_t mode,
822 uid_t uid,
823 gid_t gid) {
824
825 _cleanup_free_ char *fs = NULL;
826 int r;
827
828 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
829 return 0;
830
831 if (mode != MODE_INVALID)
832 mode &= 0777;
833
834 r = cg_get_path(controller, path, NULL, &fs);
835 if (r < 0)
836 return r;
837
838 return chmod_and_chown(fs, mode, uid, gid);
839 }
840
841 int cg_set_task_access(
842 const char *controller,
843 const char *path,
844 mode_t mode,
845 uid_t uid,
846 gid_t gid) {
847
848 _cleanup_free_ char *fs = NULL, *procs = NULL;
849 int r;
850
851 assert(path);
852
853 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
854 return 0;
855
856 if (mode != MODE_INVALID)
857 mode &= 0666;
858
859 r = cg_get_path(controller, path, "cgroup.procs", &fs);
860 if (r < 0)
861 return r;
862
863 r = chmod_and_chown(fs, mode, uid, gid);
864 if (r < 0)
865 return r;
866
867 if (cg_unified(controller))
868 return 0;
869
870 /* Compatibility, Always keep values for "tasks" in sync with
871 * "cgroup.procs" */
872 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
873 (void) chmod_and_chown(procs, mode, uid, gid);
874
875 return 0;
876 }
877
878 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
879 _cleanup_free_ char *fs = NULL;
880 int r;
881
882 assert(path);
883 assert(name);
884 assert(value || size <= 0);
885
886 r = cg_get_path(controller, path, NULL, &fs);
887 if (r < 0)
888 return r;
889
890 if (setxattr(fs, name, value, size, flags) < 0)
891 return -errno;
892
893 return 0;
894 }
895
896 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
897 _cleanup_free_ char *fs = NULL;
898 ssize_t n;
899 int r;
900
901 assert(path);
902 assert(name);
903
904 r = cg_get_path(controller, path, NULL, &fs);
905 if (r < 0)
906 return r;
907
908 n = getxattr(fs, name, value, size);
909 if (n < 0)
910 return -errno;
911
912 return (int) n;
913 }
914
915 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
916 _cleanup_fclose_ FILE *f = NULL;
917 char line[LINE_MAX];
918 const char *fs, *controller_str;
919 size_t cs = 0;
920 bool unified;
921
922 assert(path);
923 assert(pid >= 0);
924
925 if (controller) {
926 if (!cg_controller_is_valid(controller))
927 return -EINVAL;
928 } else
929 controller = SYSTEMD_CGROUP_CONTROLLER;
930
931 unified = cg_unified(controller);
932 if (!unified) {
933 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
934 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
935 else
936 controller_str = controller;
937
938 cs = strlen(controller_str);
939 }
940
941 fs = procfs_file_alloca(pid, "cgroup");
942 f = fopen(fs, "re");
943 if (!f)
944 return errno == ENOENT ? -ESRCH : -errno;
945
946 FOREACH_LINE(line, f, return -errno) {
947 char *e, *p;
948
949 truncate_nl(line);
950
951 if (unified) {
952 e = startswith(line, "0:");
953 if (!e)
954 continue;
955
956 e = strchr(e, ':');
957 if (!e)
958 continue;
959 } else {
960 char *l;
961 size_t k;
962 const char *word, *state;
963 bool found = false;
964
965 l = strchr(line, ':');
966 if (!l)
967 continue;
968
969 l++;
970 e = strchr(l, ':');
971 if (!e)
972 continue;
973
974 *e = 0;
975 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
976 if (k == cs && memcmp(word, controller_str, cs) == 0) {
977 found = true;
978 break;
979 }
980 }
981
982 if (!found)
983 continue;
984 }
985
986 p = strdup(e + 1);
987 if (!p)
988 return -ENOMEM;
989
990 *path = p;
991 return 0;
992 }
993
994 return -ENODATA;
995 }
996
997 int cg_install_release_agent(const char *controller, const char *agent) {
998 _cleanup_free_ char *fs = NULL, *contents = NULL;
999 const char *sc;
1000 int r;
1001
1002 assert(agent);
1003
1004 if (cg_unified(controller)) /* doesn't apply to unified hierarchy */
1005 return -EOPNOTSUPP;
1006
1007 r = cg_get_path(controller, NULL, "release_agent", &fs);
1008 if (r < 0)
1009 return r;
1010
1011 r = read_one_line_file(fs, &contents);
1012 if (r < 0)
1013 return r;
1014
1015 sc = strstrip(contents);
1016 if (isempty(sc)) {
1017 r = write_string_file(fs, agent, 0);
1018 if (r < 0)
1019 return r;
1020 } else if (!path_equal(sc, agent))
1021 return -EEXIST;
1022
1023 fs = mfree(fs);
1024 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1025 if (r < 0)
1026 return r;
1027
1028 contents = mfree(contents);
1029 r = read_one_line_file(fs, &contents);
1030 if (r < 0)
1031 return r;
1032
1033 sc = strstrip(contents);
1034 if (streq(sc, "0")) {
1035 r = write_string_file(fs, "1", 0);
1036 if (r < 0)
1037 return r;
1038
1039 return 1;
1040 }
1041
1042 if (!streq(sc, "1"))
1043 return -EIO;
1044
1045 return 0;
1046 }
1047
1048 int cg_uninstall_release_agent(const char *controller) {
1049 _cleanup_free_ char *fs = NULL;
1050 int r;
1051
1052 if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */
1053 return -EOPNOTSUPP;
1054
1055 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1056 if (r < 0)
1057 return r;
1058
1059 r = write_string_file(fs, "0", 0);
1060 if (r < 0)
1061 return r;
1062
1063 fs = mfree(fs);
1064
1065 r = cg_get_path(controller, NULL, "release_agent", &fs);
1066 if (r < 0)
1067 return r;
1068
1069 r = write_string_file(fs, "", 0);
1070 if (r < 0)
1071 return r;
1072
1073 return 0;
1074 }
1075
1076 int cg_is_empty(const char *controller, const char *path) {
1077 _cleanup_fclose_ FILE *f = NULL;
1078 pid_t pid;
1079 int r;
1080
1081 assert(path);
1082
1083 r = cg_enumerate_processes(controller, path, &f);
1084 if (r == -ENOENT)
1085 return 1;
1086 if (r < 0)
1087 return r;
1088
1089 r = cg_read_pid(f, &pid);
1090 if (r < 0)
1091 return r;
1092
1093 return r == 0;
1094 }
1095
1096 int cg_is_empty_recursive(const char *controller, const char *path) {
1097 int r;
1098
1099 assert(path);
1100
1101 /* The root cgroup is always populated */
1102 if (controller && (isempty(path) || path_equal(path, "/")))
1103 return false;
1104
1105 if (cg_unified(controller)) {
1106 _cleanup_free_ char *t = NULL;
1107
1108 /* On the unified hierarchy we can check empty state
1109 * via the "populated" attribute of "cgroup.events". */
1110
1111 r = cg_read_event(controller, path, "populated", &t);
1112 if (r < 0)
1113 return r;
1114
1115 return streq(t, "0");
1116 } else {
1117 _cleanup_closedir_ DIR *d = NULL;
1118 char *fn;
1119
1120 r = cg_is_empty(controller, path);
1121 if (r <= 0)
1122 return r;
1123
1124 r = cg_enumerate_subgroups(controller, path, &d);
1125 if (r == -ENOENT)
1126 return 1;
1127 if (r < 0)
1128 return r;
1129
1130 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1131 _cleanup_free_ char *p = NULL;
1132
1133 p = strjoin(path, "/", fn);
1134 free(fn);
1135 if (!p)
1136 return -ENOMEM;
1137
1138 r = cg_is_empty_recursive(controller, p);
1139 if (r <= 0)
1140 return r;
1141 }
1142 if (r < 0)
1143 return r;
1144
1145 return true;
1146 }
1147 }
1148
1149 int cg_split_spec(const char *spec, char **controller, char **path) {
1150 char *t = NULL, *u = NULL;
1151 const char *e;
1152
1153 assert(spec);
1154
1155 if (*spec == '/') {
1156 if (!path_is_safe(spec))
1157 return -EINVAL;
1158
1159 if (path) {
1160 t = strdup(spec);
1161 if (!t)
1162 return -ENOMEM;
1163
1164 *path = path_kill_slashes(t);
1165 }
1166
1167 if (controller)
1168 *controller = NULL;
1169
1170 return 0;
1171 }
1172
1173 e = strchr(spec, ':');
1174 if (!e) {
1175 if (!cg_controller_is_valid(spec))
1176 return -EINVAL;
1177
1178 if (controller) {
1179 t = strdup(spec);
1180 if (!t)
1181 return -ENOMEM;
1182
1183 *controller = t;
1184 }
1185
1186 if (path)
1187 *path = NULL;
1188
1189 return 0;
1190 }
1191
1192 t = strndup(spec, e-spec);
1193 if (!t)
1194 return -ENOMEM;
1195 if (!cg_controller_is_valid(t)) {
1196 free(t);
1197 return -EINVAL;
1198 }
1199
1200 if (isempty(e+1))
1201 u = NULL;
1202 else {
1203 u = strdup(e+1);
1204 if (!u) {
1205 free(t);
1206 return -ENOMEM;
1207 }
1208
1209 if (!path_is_safe(u) ||
1210 !path_is_absolute(u)) {
1211 free(t);
1212 free(u);
1213 return -EINVAL;
1214 }
1215
1216 path_kill_slashes(u);
1217 }
1218
1219 if (controller)
1220 *controller = t;
1221 else
1222 free(t);
1223
1224 if (path)
1225 *path = u;
1226 else
1227 free(u);
1228
1229 return 0;
1230 }
1231
1232 int cg_mangle_path(const char *path, char **result) {
1233 _cleanup_free_ char *c = NULL, *p = NULL;
1234 char *t;
1235 int r;
1236
1237 assert(path);
1238 assert(result);
1239
1240 /* First, check if it already is a filesystem path */
1241 if (path_startswith(path, "/sys/fs/cgroup")) {
1242
1243 t = strdup(path);
1244 if (!t)
1245 return -ENOMEM;
1246
1247 *result = path_kill_slashes(t);
1248 return 0;
1249 }
1250
1251 /* Otherwise, treat it as cg spec */
1252 r = cg_split_spec(path, &c, &p);
1253 if (r < 0)
1254 return r;
1255
1256 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1257 }
1258
1259 int cg_get_root_path(char **path) {
1260 char *p, *e;
1261 int r;
1262
1263 assert(path);
1264
1265 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1266 if (r < 0)
1267 return r;
1268
1269 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1270 if (!e)
1271 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1272 if (!e)
1273 e = endswith(p, "/system"); /* even more legacy */
1274 if (e)
1275 *e = 0;
1276
1277 *path = p;
1278 return 0;
1279 }
1280
1281 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1282 _cleanup_free_ char *rt = NULL;
1283 char *p;
1284 int r;
1285
1286 assert(cgroup);
1287 assert(shifted);
1288
1289 if (!root) {
1290 /* If the root was specified let's use that, otherwise
1291 * let's determine it from PID 1 */
1292
1293 r = cg_get_root_path(&rt);
1294 if (r < 0)
1295 return r;
1296
1297 root = rt;
1298 }
1299
1300 p = path_startswith(cgroup, root);
1301 if (p && p > cgroup)
1302 *shifted = p - 1;
1303 else
1304 *shifted = cgroup;
1305
1306 return 0;
1307 }
1308
1309 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1310 _cleanup_free_ char *raw = NULL;
1311 const char *c;
1312 int r;
1313
1314 assert(pid >= 0);
1315 assert(cgroup);
1316
1317 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1318 if (r < 0)
1319 return r;
1320
1321 r = cg_shift_path(raw, root, &c);
1322 if (r < 0)
1323 return r;
1324
1325 if (c == raw) {
1326 *cgroup = raw;
1327 raw = NULL;
1328 } else {
1329 char *n;
1330
1331 n = strdup(c);
1332 if (!n)
1333 return -ENOMEM;
1334
1335 *cgroup = n;
1336 }
1337
1338 return 0;
1339 }
1340
1341 int cg_path_decode_unit(const char *cgroup, char **unit) {
1342 char *c, *s;
1343 size_t n;
1344
1345 assert(cgroup);
1346 assert(unit);
1347
1348 n = strcspn(cgroup, "/");
1349 if (n < 3)
1350 return -ENXIO;
1351
1352 c = strndupa(cgroup, n);
1353 c = cg_unescape(c);
1354
1355 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1356 return -ENXIO;
1357
1358 s = strdup(c);
1359 if (!s)
1360 return -ENOMEM;
1361
1362 *unit = s;
1363 return 0;
1364 }
1365
1366 static bool valid_slice_name(const char *p, size_t n) {
1367
1368 if (!p)
1369 return false;
1370
1371 if (n < strlen("x.slice"))
1372 return false;
1373
1374 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1375 char buf[n+1], *c;
1376
1377 memcpy(buf, p, n);
1378 buf[n] = 0;
1379
1380 c = cg_unescape(buf);
1381
1382 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1383 }
1384
1385 return false;
1386 }
1387
1388 static const char *skip_slices(const char *p) {
1389 assert(p);
1390
1391 /* Skips over all slice assignments */
1392
1393 for (;;) {
1394 size_t n;
1395
1396 p += strspn(p, "/");
1397
1398 n = strcspn(p, "/");
1399 if (!valid_slice_name(p, n))
1400 return p;
1401
1402 p += n;
1403 }
1404 }
1405
1406 int cg_path_get_unit(const char *path, char **ret) {
1407 const char *e;
1408 char *unit;
1409 int r;
1410
1411 assert(path);
1412 assert(ret);
1413
1414 e = skip_slices(path);
1415
1416 r = cg_path_decode_unit(e, &unit);
1417 if (r < 0)
1418 return r;
1419
1420 /* We skipped over the slices, don't accept any now */
1421 if (endswith(unit, ".slice")) {
1422 free(unit);
1423 return -ENXIO;
1424 }
1425
1426 *ret = unit;
1427 return 0;
1428 }
1429
1430 int cg_pid_get_unit(pid_t pid, char **unit) {
1431 _cleanup_free_ char *cgroup = NULL;
1432 int r;
1433
1434 assert(unit);
1435
1436 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1437 if (r < 0)
1438 return r;
1439
1440 return cg_path_get_unit(cgroup, unit);
1441 }
1442
1443 /**
1444 * Skip session-*.scope, but require it to be there.
1445 */
1446 static const char *skip_session(const char *p) {
1447 size_t n;
1448
1449 if (isempty(p))
1450 return NULL;
1451
1452 p += strspn(p, "/");
1453
1454 n = strcspn(p, "/");
1455 if (n < strlen("session-x.scope"))
1456 return NULL;
1457
1458 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1459 char buf[n - 8 - 6 + 1];
1460
1461 memcpy(buf, p + 8, n - 8 - 6);
1462 buf[n - 8 - 6] = 0;
1463
1464 /* Note that session scopes never need unescaping,
1465 * since they cannot conflict with the kernel's own
1466 * names, hence we don't need to call cg_unescape()
1467 * here. */
1468
1469 if (!session_id_valid(buf))
1470 return false;
1471
1472 p += n;
1473 p += strspn(p, "/");
1474 return p;
1475 }
1476
1477 return NULL;
1478 }
1479
1480 /**
1481 * Skip user@*.service, but require it to be there.
1482 */
1483 static const char *skip_user_manager(const char *p) {
1484 size_t n;
1485
1486 if (isempty(p))
1487 return NULL;
1488
1489 p += strspn(p, "/");
1490
1491 n = strcspn(p, "/");
1492 if (n < strlen("user@x.service"))
1493 return NULL;
1494
1495 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1496 char buf[n - 5 - 8 + 1];
1497
1498 memcpy(buf, p + 5, n - 5 - 8);
1499 buf[n - 5 - 8] = 0;
1500
1501 /* Note that user manager services never need unescaping,
1502 * since they cannot conflict with the kernel's own
1503 * names, hence we don't need to call cg_unescape()
1504 * here. */
1505
1506 if (parse_uid(buf, NULL) < 0)
1507 return NULL;
1508
1509 p += n;
1510 p += strspn(p, "/");
1511
1512 return p;
1513 }
1514
1515 return NULL;
1516 }
1517
1518 static const char *skip_user_prefix(const char *path) {
1519 const char *e, *t;
1520
1521 assert(path);
1522
1523 /* Skip slices, if there are any */
1524 e = skip_slices(path);
1525
1526 /* Skip the user manager, if it's in the path now... */
1527 t = skip_user_manager(e);
1528 if (t)
1529 return t;
1530
1531 /* Alternatively skip the user session if it is in the path... */
1532 return skip_session(e);
1533 }
1534
1535 int cg_path_get_user_unit(const char *path, char **ret) {
1536 const char *t;
1537
1538 assert(path);
1539 assert(ret);
1540
1541 t = skip_user_prefix(path);
1542 if (!t)
1543 return -ENXIO;
1544
1545 /* And from here on it looks pretty much the same as for a
1546 * system unit, hence let's use the same parser from here
1547 * on. */
1548 return cg_path_get_unit(t, ret);
1549 }
1550
1551 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1552 _cleanup_free_ char *cgroup = NULL;
1553 int r;
1554
1555 assert(unit);
1556
1557 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1558 if (r < 0)
1559 return r;
1560
1561 return cg_path_get_user_unit(cgroup, unit);
1562 }
1563
1564 int cg_path_get_machine_name(const char *path, char **machine) {
1565 _cleanup_free_ char *u = NULL;
1566 const char *sl;
1567 int r;
1568
1569 r = cg_path_get_unit(path, &u);
1570 if (r < 0)
1571 return r;
1572
1573 sl = strjoina("/run/systemd/machines/unit:", u);
1574 return readlink_malloc(sl, machine);
1575 }
1576
1577 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1578 _cleanup_free_ char *cgroup = NULL;
1579 int r;
1580
1581 assert(machine);
1582
1583 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1584 if (r < 0)
1585 return r;
1586
1587 return cg_path_get_machine_name(cgroup, machine);
1588 }
1589
1590 int cg_path_get_session(const char *path, char **session) {
1591 _cleanup_free_ char *unit = NULL;
1592 char *start, *end;
1593 int r;
1594
1595 assert(path);
1596
1597 r = cg_path_get_unit(path, &unit);
1598 if (r < 0)
1599 return r;
1600
1601 start = startswith(unit, "session-");
1602 if (!start)
1603 return -ENXIO;
1604 end = endswith(start, ".scope");
1605 if (!end)
1606 return -ENXIO;
1607
1608 *end = 0;
1609 if (!session_id_valid(start))
1610 return -ENXIO;
1611
1612 if (session) {
1613 char *rr;
1614
1615 rr = strdup(start);
1616 if (!rr)
1617 return -ENOMEM;
1618
1619 *session = rr;
1620 }
1621
1622 return 0;
1623 }
1624
1625 int cg_pid_get_session(pid_t pid, char **session) {
1626 _cleanup_free_ char *cgroup = NULL;
1627 int r;
1628
1629 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1630 if (r < 0)
1631 return r;
1632
1633 return cg_path_get_session(cgroup, session);
1634 }
1635
1636 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1637 _cleanup_free_ char *slice = NULL;
1638 char *start, *end;
1639 int r;
1640
1641 assert(path);
1642
1643 r = cg_path_get_slice(path, &slice);
1644 if (r < 0)
1645 return r;
1646
1647 start = startswith(slice, "user-");
1648 if (!start)
1649 return -ENXIO;
1650 end = endswith(start, ".slice");
1651 if (!end)
1652 return -ENXIO;
1653
1654 *end = 0;
1655 if (parse_uid(start, uid) < 0)
1656 return -ENXIO;
1657
1658 return 0;
1659 }
1660
1661 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1662 _cleanup_free_ char *cgroup = NULL;
1663 int r;
1664
1665 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1666 if (r < 0)
1667 return r;
1668
1669 return cg_path_get_owner_uid(cgroup, uid);
1670 }
1671
1672 int cg_path_get_slice(const char *p, char **slice) {
1673 const char *e = NULL;
1674
1675 assert(p);
1676 assert(slice);
1677
1678 /* Finds the right-most slice unit from the beginning, but
1679 * stops before we come to the first non-slice unit. */
1680
1681 for (;;) {
1682 size_t n;
1683
1684 p += strspn(p, "/");
1685
1686 n = strcspn(p, "/");
1687 if (!valid_slice_name(p, n)) {
1688
1689 if (!e) {
1690 char *s;
1691
1692 s = strdup(SPECIAL_ROOT_SLICE);
1693 if (!s)
1694 return -ENOMEM;
1695
1696 *slice = s;
1697 return 0;
1698 }
1699
1700 return cg_path_decode_unit(e, slice);
1701 }
1702
1703 e = p;
1704 p += n;
1705 }
1706 }
1707
1708 int cg_pid_get_slice(pid_t pid, char **slice) {
1709 _cleanup_free_ char *cgroup = NULL;
1710 int r;
1711
1712 assert(slice);
1713
1714 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1715 if (r < 0)
1716 return r;
1717
1718 return cg_path_get_slice(cgroup, slice);
1719 }
1720
1721 int cg_path_get_user_slice(const char *p, char **slice) {
1722 const char *t;
1723 assert(p);
1724 assert(slice);
1725
1726 t = skip_user_prefix(p);
1727 if (!t)
1728 return -ENXIO;
1729
1730 /* And now it looks pretty much the same as for a system
1731 * slice, so let's just use the same parser from here on. */
1732 return cg_path_get_slice(t, slice);
1733 }
1734
1735 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1736 _cleanup_free_ char *cgroup = NULL;
1737 int r;
1738
1739 assert(slice);
1740
1741 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1742 if (r < 0)
1743 return r;
1744
1745 return cg_path_get_user_slice(cgroup, slice);
1746 }
1747
1748 char *cg_escape(const char *p) {
1749 bool need_prefix = false;
1750
1751 /* This implements very minimal escaping for names to be used
1752 * as file names in the cgroup tree: any name which might
1753 * conflict with a kernel name or is prefixed with '_' is
1754 * prefixed with a '_'. That way, when reading cgroup names it
1755 * is sufficient to remove a single prefixing underscore if
1756 * there is one. */
1757
1758 /* The return value of this function (unlike cg_unescape())
1759 * needs free()! */
1760
1761 if (p[0] == 0 ||
1762 p[0] == '_' ||
1763 p[0] == '.' ||
1764 streq(p, "notify_on_release") ||
1765 streq(p, "release_agent") ||
1766 streq(p, "tasks") ||
1767 startswith(p, "cgroup."))
1768 need_prefix = true;
1769 else {
1770 const char *dot;
1771
1772 dot = strrchr(p, '.');
1773 if (dot) {
1774 CGroupController c;
1775 size_t l = dot - p;
1776
1777 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1778 const char *n;
1779
1780 n = cgroup_controller_to_string(c);
1781
1782 if (l != strlen(n))
1783 continue;
1784
1785 if (memcmp(p, n, l) != 0)
1786 continue;
1787
1788 need_prefix = true;
1789 break;
1790 }
1791 }
1792 }
1793
1794 if (need_prefix)
1795 return strappend("_", p);
1796
1797 return strdup(p);
1798 }
1799
1800 char *cg_unescape(const char *p) {
1801 assert(p);
1802
1803 /* The return value of this function (unlike cg_escape())
1804 * doesn't need free()! */
1805
1806 if (p[0] == '_')
1807 return (char*) p+1;
1808
1809 return (char*) p;
1810 }
1811
1812 #define CONTROLLER_VALID \
1813 DIGITS LETTERS \
1814 "_"
1815
1816 bool cg_controller_is_valid(const char *p) {
1817 const char *t, *s;
1818
1819 if (!p)
1820 return false;
1821
1822 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1823 return true;
1824
1825 s = startswith(p, "name=");
1826 if (s)
1827 p = s;
1828
1829 if (*p == 0 || *p == '_')
1830 return false;
1831
1832 for (t = p; *t; t++)
1833 if (!strchr(CONTROLLER_VALID, *t))
1834 return false;
1835
1836 if (t - p > FILENAME_MAX)
1837 return false;
1838
1839 return true;
1840 }
1841
1842 int cg_slice_to_path(const char *unit, char **ret) {
1843 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1844 const char *dash;
1845 int r;
1846
1847 assert(unit);
1848 assert(ret);
1849
1850 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1851 char *x;
1852
1853 x = strdup("");
1854 if (!x)
1855 return -ENOMEM;
1856 *ret = x;
1857 return 0;
1858 }
1859
1860 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1861 return -EINVAL;
1862
1863 if (!endswith(unit, ".slice"))
1864 return -EINVAL;
1865
1866 r = unit_name_to_prefix(unit, &p);
1867 if (r < 0)
1868 return r;
1869
1870 dash = strchr(p, '-');
1871
1872 /* Don't allow initial dashes */
1873 if (dash == p)
1874 return -EINVAL;
1875
1876 while (dash) {
1877 _cleanup_free_ char *escaped = NULL;
1878 char n[dash - p + sizeof(".slice")];
1879
1880 /* Don't allow trailing or double dashes */
1881 if (dash[1] == 0 || dash[1] == '-')
1882 return -EINVAL;
1883
1884 strcpy(stpncpy(n, p, dash - p), ".slice");
1885 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1886 return -EINVAL;
1887
1888 escaped = cg_escape(n);
1889 if (!escaped)
1890 return -ENOMEM;
1891
1892 if (!strextend(&s, escaped, "/", NULL))
1893 return -ENOMEM;
1894
1895 dash = strchr(dash+1, '-');
1896 }
1897
1898 e = cg_escape(unit);
1899 if (!e)
1900 return -ENOMEM;
1901
1902 if (!strextend(&s, e, NULL))
1903 return -ENOMEM;
1904
1905 *ret = s;
1906 s = NULL;
1907
1908 return 0;
1909 }
1910
1911 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1912 _cleanup_free_ char *p = NULL;
1913 int r;
1914
1915 r = cg_get_path(controller, path, attribute, &p);
1916 if (r < 0)
1917 return r;
1918
1919 return write_string_file(p, value, 0);
1920 }
1921
1922 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1923 _cleanup_free_ char *p = NULL;
1924 int r;
1925
1926 r = cg_get_path(controller, path, attribute, &p);
1927 if (r < 0)
1928 return r;
1929
1930 return read_one_line_file(p, ret);
1931 }
1932
1933 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
1934 _cleanup_free_ char *filename = NULL, *content = NULL;
1935 char *line, *p;
1936 int i, r;
1937
1938 for (i = 0; keys[i]; i++)
1939 values[i] = NULL;
1940
1941 r = cg_get_path(controller, path, attribute, &filename);
1942 if (r < 0)
1943 return r;
1944
1945 r = read_full_file(filename, &content, NULL);
1946 if (r < 0)
1947 return r;
1948
1949 p = content;
1950 while ((line = strsep(&p, "\n"))) {
1951 char *key;
1952
1953 key = strsep(&line, " ");
1954
1955 for (i = 0; keys[i]; i++) {
1956 if (streq(key, keys[i])) {
1957 values[i] = strdup(line);
1958 break;
1959 }
1960 }
1961 }
1962
1963 for (i = 0; keys[i]; i++) {
1964 if (!values[i]) {
1965 for (i = 0; keys[i]; i++) {
1966 free(values[i]);
1967 values[i] = NULL;
1968 }
1969 return -ENOENT;
1970 }
1971 }
1972
1973 return 0;
1974 }
1975
1976 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1977 CGroupController c;
1978 int r;
1979
1980 /* This one will create a cgroup in our private tree, but also
1981 * duplicate it in the trees specified in mask, and remove it
1982 * in all others */
1983
1984 /* First create the cgroup in our own hierarchy. */
1985 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1986 if (r < 0)
1987 return r;
1988
1989 /* If we are in the unified hierarchy, we are done now */
1990 if (cg_all_unified())
1991 return 0;
1992
1993 /* Otherwise, do the same in the other hierarchies */
1994 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1995 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1996 const char *n;
1997
1998 n = cgroup_controller_to_string(c);
1999
2000 if (mask & bit)
2001 (void) cg_create(n, path);
2002 else if (supported & bit)
2003 (void) cg_trim(n, path, true);
2004 }
2005
2006 return 0;
2007 }
2008
2009 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2010 CGroupController c;
2011 int r;
2012
2013 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2014 if (r < 0)
2015 return r;
2016
2017 if (cg_all_unified())
2018 return 0;
2019
2020 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2021 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2022 const char *p = NULL;
2023
2024 if (!(supported & bit))
2025 continue;
2026
2027 if (path_callback)
2028 p = path_callback(bit, userdata);
2029
2030 if (!p)
2031 p = path;
2032
2033 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2034 }
2035
2036 return 0;
2037 }
2038
2039 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2040 Iterator i;
2041 void *pidp;
2042 int r = 0;
2043
2044 SET_FOREACH(pidp, pids, i) {
2045 pid_t pid = PTR_TO_PID(pidp);
2046 int q;
2047
2048 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2049 if (q < 0 && r >= 0)
2050 r = q;
2051 }
2052
2053 return r;
2054 }
2055
2056 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2057 CGroupController c;
2058 int r = 0;
2059
2060 if (!path_equal(from, to)) {
2061 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2062 if (r < 0)
2063 return r;
2064 }
2065
2066 if (cg_all_unified())
2067 return r;
2068
2069 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2070 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2071 const char *p = NULL;
2072
2073 if (!(supported & bit))
2074 continue;
2075
2076 if (to_callback)
2077 p = to_callback(bit, userdata);
2078
2079 if (!p)
2080 p = to;
2081
2082 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2083 }
2084
2085 return 0;
2086 }
2087
2088 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2089 CGroupController c;
2090 int r;
2091
2092 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2093 if (r < 0)
2094 return r;
2095
2096 if (cg_all_unified())
2097 return r;
2098
2099 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2100 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2101
2102 if (!(supported & bit))
2103 continue;
2104
2105 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2106 }
2107
2108 return 0;
2109 }
2110
2111 int cg_mask_supported(CGroupMask *ret) {
2112 CGroupMask mask = 0;
2113 int r;
2114
2115 /* Determines the mask of supported cgroup controllers. Only
2116 * includes controllers we can make sense of and that are
2117 * actually accessible. */
2118
2119 if (cg_all_unified()) {
2120 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2121 const char *c;
2122
2123 /* In the unified hierarchy we can read the supported
2124 * and accessible controllers from a the top-level
2125 * cgroup attribute */
2126
2127 r = cg_get_root_path(&root);
2128 if (r < 0)
2129 return r;
2130
2131 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2132 if (r < 0)
2133 return r;
2134
2135 r = read_one_line_file(path, &controllers);
2136 if (r < 0)
2137 return r;
2138
2139 c = controllers;
2140 for (;;) {
2141 _cleanup_free_ char *n = NULL;
2142 CGroupController v;
2143
2144 r = extract_first_word(&c, &n, NULL, 0);
2145 if (r < 0)
2146 return r;
2147 if (r == 0)
2148 break;
2149
2150 v = cgroup_controller_from_string(n);
2151 if (v < 0)
2152 continue;
2153
2154 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2155 }
2156
2157 /* Currently, we support the cpu, memory, io and pids
2158 * controller in the unified hierarchy, mask
2159 * everything else off. */
2160 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2161
2162 } else {
2163 CGroupController c;
2164
2165 /* In the legacy hierarchy, we check whether which
2166 * hierarchies are mounted. */
2167
2168 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2169 const char *n;
2170
2171 n = cgroup_controller_to_string(c);
2172 if (controller_is_accessible(n) >= 0)
2173 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2174 }
2175 }
2176
2177 *ret = mask;
2178 return 0;
2179 }
2180
2181 int cg_kernel_controllers(Set *controllers) {
2182 _cleanup_fclose_ FILE *f = NULL;
2183 char buf[LINE_MAX];
2184 int r;
2185
2186 assert(controllers);
2187
2188 /* Determines the full list of kernel-known controllers. Might
2189 * include controllers we don't actually support, arbitrary
2190 * named hierarchies and controllers that aren't currently
2191 * accessible (because not mounted). */
2192
2193 f = fopen("/proc/cgroups", "re");
2194 if (!f) {
2195 if (errno == ENOENT)
2196 return 0;
2197 return -errno;
2198 }
2199
2200 /* Ignore the header line */
2201 (void) fgets(buf, sizeof(buf), f);
2202
2203 for (;;) {
2204 char *controller;
2205 int enabled = 0;
2206
2207 errno = 0;
2208 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2209
2210 if (feof(f))
2211 break;
2212
2213 if (ferror(f) && errno > 0)
2214 return -errno;
2215
2216 return -EBADMSG;
2217 }
2218
2219 if (!enabled) {
2220 free(controller);
2221 continue;
2222 }
2223
2224 if (!cg_controller_is_valid(controller)) {
2225 free(controller);
2226 return -EBADMSG;
2227 }
2228
2229 r = set_consume(controllers, controller);
2230 if (r < 0)
2231 return r;
2232 }
2233
2234 return 0;
2235 }
2236
2237 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2238
2239 static int cg_update_unified(void) {
2240
2241 struct statfs fs;
2242
2243 /* Checks if we support the unified hierarchy. Returns an
2244 * error when the cgroup hierarchies aren't mounted yet or we
2245 * have any other trouble determining if the unified hierarchy
2246 * is supported. */
2247
2248 if (unified_cache >= CGROUP_UNIFIED_NONE)
2249 return 0;
2250
2251 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2252 return -errno;
2253
2254 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2255 unified_cache = CGROUP_UNIFIED_ALL;
2256 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2257 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2258 return -errno;
2259
2260 unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
2261 CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
2262 } else
2263 return -ENOMEDIUM;
2264
2265 return 0;
2266 }
2267
2268 bool cg_unified(const char *controller) {
2269
2270 assert(cg_update_unified() >= 0);
2271
2272 if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
2273 return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
2274 else
2275 return unified_cache >= CGROUP_UNIFIED_ALL;
2276 }
2277
2278 bool cg_all_unified(void) {
2279
2280 return cg_unified(NULL);
2281 }
2282
2283 int cg_unified_flush(void) {
2284 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2285
2286 return cg_update_unified();
2287 }
2288
2289 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2290 _cleanup_free_ char *fs = NULL;
2291 CGroupController c;
2292 int r;
2293
2294 assert(p);
2295
2296 if (supported == 0)
2297 return 0;
2298
2299 if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */
2300 return 0;
2301
2302 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2303 if (r < 0)
2304 return r;
2305
2306 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2307 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2308 const char *n;
2309
2310 if (!(supported & bit))
2311 continue;
2312
2313 n = cgroup_controller_to_string(c);
2314 {
2315 char s[1 + strlen(n) + 1];
2316
2317 s[0] = mask & bit ? '+' : '-';
2318 strcpy(s + 1, n);
2319
2320 r = write_string_file(fs, s, 0);
2321 if (r < 0)
2322 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2323 }
2324 }
2325
2326 return 0;
2327 }
2328
2329 bool cg_is_unified_wanted(void) {
2330 static thread_local int wanted = -1;
2331 int r;
2332 bool b;
2333
2334 /* If the hierarchy is already mounted, then follow whatever
2335 * was chosen for it. */
2336 if (cg_unified_flush() >= 0)
2337 return cg_all_unified();
2338
2339 /* Otherwise, let's see what the kernel command line has to
2340 * say. Since checking that is expensive, let's cache the
2341 * result. */
2342 if (wanted >= 0)
2343 return wanted;
2344
2345 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2346 if (r < 0)
2347 return false;
2348
2349 return (wanted = r > 0 ? b : false);
2350 }
2351
2352 bool cg_is_legacy_wanted(void) {
2353 return !cg_is_unified_wanted();
2354 }
2355
2356 bool cg_is_unified_systemd_controller_wanted(void) {
2357 static thread_local int wanted = -1;
2358 int r;
2359 bool b;
2360
2361 /* If the unified hierarchy is requested in full, no need to
2362 * bother with this. */
2363 if (cg_is_unified_wanted())
2364 return 0;
2365
2366 /* If the hierarchy is already mounted, then follow whatever
2367 * was chosen for it. */
2368 if (cg_unified_flush() >= 0)
2369 return cg_unified(SYSTEMD_CGROUP_CONTROLLER);
2370
2371 /* Otherwise, let's see what the kernel command line has to
2372 * say. Since checking that is expensive, let's cache the
2373 * result. */
2374 if (wanted >= 0)
2375 return wanted;
2376
2377 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2378 if (r < 0)
2379 return false;
2380
2381 return (wanted = r > 0 ? b : false);
2382 }
2383
2384 bool cg_is_legacy_systemd_controller_wanted(void) {
2385 return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted();
2386 }
2387
2388 int cg_weight_parse(const char *s, uint64_t *ret) {
2389 uint64_t u;
2390 int r;
2391
2392 if (isempty(s)) {
2393 *ret = CGROUP_WEIGHT_INVALID;
2394 return 0;
2395 }
2396
2397 r = safe_atou64(s, &u);
2398 if (r < 0)
2399 return r;
2400
2401 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2402 return -ERANGE;
2403
2404 *ret = u;
2405 return 0;
2406 }
2407
2408 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2409 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2410 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2411 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2412 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2413 };
2414
2415 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2416 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2417 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2418 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2419 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2420 };
2421
2422 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2423
2424 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2425 uint64_t u;
2426 int r;
2427
2428 if (isempty(s)) {
2429 *ret = CGROUP_CPU_SHARES_INVALID;
2430 return 0;
2431 }
2432
2433 r = safe_atou64(s, &u);
2434 if (r < 0)
2435 return r;
2436
2437 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2438 return -ERANGE;
2439
2440 *ret = u;
2441 return 0;
2442 }
2443
2444 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2445 uint64_t u;
2446 int r;
2447
2448 if (isempty(s)) {
2449 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2450 return 0;
2451 }
2452
2453 r = safe_atou64(s, &u);
2454 if (r < 0)
2455 return r;
2456
2457 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2458 return -ERANGE;
2459
2460 *ret = u;
2461 return 0;
2462 }
2463
2464 bool is_cgroup_fs(const struct statfs *s) {
2465 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2466 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2467 }
2468
2469 bool fd_is_cgroup_fs(int fd) {
2470 struct statfs s;
2471
2472 if (fstatfs(fd, &s) < 0)
2473 return -errno;
2474
2475 return is_cgroup_fs(&s);
2476 }
2477
2478 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2479 [CGROUP_CONTROLLER_CPU] = "cpu",
2480 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2481 [CGROUP_CONTROLLER_IO] = "io",
2482 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2483 [CGROUP_CONTROLLER_MEMORY] = "memory",
2484 [CGROUP_CONTROLLER_DEVICES] = "devices",
2485 [CGROUP_CONTROLLER_PIDS] = "pids",
2486 };
2487
2488 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);