]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/cgroup-util.c
core: general cgroup rework
[thirdparty/systemd.git] / src / shared / cgroup-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <unistd.h>
24 #include <signal.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <ftw.h>
31
32 #include "cgroup-util.h"
33 #include "log.h"
34 #include "set.h"
35 #include "macro.h"
36 #include "util.h"
37 #include "path-util.h"
38 #include "strv.h"
39 #include "unit-name.h"
40 #include "fileio.h"
41 #include "special.h"
42
43 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
44 _cleanup_free_ char *fs = NULL;
45 FILE *f;
46 int r;
47
48 assert(_f);
49
50 r = cg_get_path(controller, path, "cgroup.procs", &fs);
51 if (r < 0)
52 return r;
53
54 f = fopen(fs, "re");
55 if (!f)
56 return -errno;
57
58 *_f = f;
59 return 0;
60 }
61
62 int cg_read_pid(FILE *f, pid_t *_pid) {
63 unsigned long ul;
64
65 /* Note that the cgroup.procs might contain duplicates! See
66 * cgroups.txt for details. */
67
68 assert(f);
69 assert(_pid);
70
71 errno = 0;
72 if (fscanf(f, "%lu", &ul) != 1) {
73
74 if (feof(f))
75 return 0;
76
77 return errno ? -errno : -EIO;
78 }
79
80 if (ul <= 0)
81 return -EIO;
82
83 *_pid = (pid_t) ul;
84 return 1;
85 }
86
87 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
88 _cleanup_free_ char *fs = NULL;
89 int r;
90 DIR *d;
91
92 assert(_d);
93
94 /* This is not recursive! */
95
96 r = cg_get_path(controller, path, NULL, &fs);
97 if (r < 0)
98 return r;
99
100 d = opendir(fs);
101 if (!d)
102 return -errno;
103
104 *_d = d;
105 return 0;
106 }
107
108 int cg_read_subgroup(DIR *d, char **fn) {
109 struct dirent *de;
110
111 assert(d);
112 assert(fn);
113
114 FOREACH_DIRENT(de, d, return -errno) {
115 char *b;
116
117 if (de->d_type != DT_DIR)
118 continue;
119
120 if (streq(de->d_name, ".") ||
121 streq(de->d_name, ".."))
122 continue;
123
124 b = strdup(de->d_name);
125 if (!b)
126 return -ENOMEM;
127
128 *fn = b;
129 return 1;
130 }
131
132 return 0;
133 }
134
135 int cg_rmdir(const char *controller, const char *path) {
136 _cleanup_free_ char *p = NULL;
137 int r;
138
139 r = cg_get_path(controller, path, NULL, &p);
140 if (r < 0)
141 return r;
142
143 r = rmdir(p);
144 if (r < 0 && errno != ENOENT)
145 return -errno;
146
147 return 0;
148 }
149
150 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
151 _cleanup_set_free_ Set *allocated_set = NULL;
152 bool done = false;
153 int r, ret = 0;
154 pid_t my_pid;
155
156 assert(sig >= 0);
157
158 /* This goes through the tasks list and kills them all. This
159 * is repeated until no further processes are added to the
160 * tasks list, to properly handle forking processes */
161
162 if (!s) {
163 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
164 if (!s)
165 return -ENOMEM;
166 }
167
168 my_pid = getpid();
169
170 do {
171 _cleanup_fclose_ FILE *f = NULL;
172 pid_t pid = 0;
173 done = true;
174
175 r = cg_enumerate_processes(controller, path, &f);
176 if (r < 0) {
177 if (ret >= 0 && r != -ENOENT)
178 return r;
179
180 return ret;
181 }
182
183 while ((r = cg_read_pid(f, &pid)) > 0) {
184
185 if (ignore_self && pid == my_pid)
186 continue;
187
188 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
189 continue;
190
191 /* If we haven't killed this process yet, kill
192 * it */
193 if (kill(pid, sig) < 0) {
194 if (ret >= 0 && errno != ESRCH)
195 ret = -errno;
196 } else if (ret == 0) {
197
198 if (sigcont)
199 kill(pid, SIGCONT);
200
201 ret = 1;
202 }
203
204 done = false;
205
206 r = set_put(s, LONG_TO_PTR(pid));
207 if (r < 0) {
208 if (ret >= 0)
209 return r;
210
211 return ret;
212 }
213 }
214
215 if (r < 0) {
216 if (ret >= 0)
217 return r;
218
219 return ret;
220 }
221
222 /* To avoid racing against processes which fork
223 * quicker than we can kill them we repeat this until
224 * no new pids need to be killed. */
225
226 } while (!done);
227
228 return ret;
229 }
230
231 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
232 _cleanup_set_free_ Set *allocated_set = NULL;
233 _cleanup_closedir_ DIR *d = NULL;
234 int r, ret = 0;
235 char *fn;
236
237 assert(path);
238 assert(sig >= 0);
239
240 if (!s) {
241 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
242 if (!s)
243 return -ENOMEM;
244 }
245
246 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
247
248 r = cg_enumerate_subgroups(controller, path, &d);
249 if (r < 0) {
250 if (ret >= 0 && r != -ENOENT)
251 return r;
252
253 return ret;
254 }
255
256 while ((r = cg_read_subgroup(d, &fn)) > 0) {
257 _cleanup_free_ char *p = NULL;
258
259 p = strjoin(path, "/", fn, NULL);
260 free(fn);
261 if (!p)
262 return -ENOMEM;
263
264 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
265 if (ret >= 0 && r != 0)
266 ret = r;
267 }
268
269 if (ret >= 0 && r < 0)
270 ret = r;
271
272 if (rem) {
273 r = cg_rmdir(controller, path);
274 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
275 return r;
276 }
277
278 return ret;
279 }
280
281 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
282 unsigned i;
283
284 assert(path);
285
286 /* This safely kills all processes; first it sends a SIGTERM,
287 * then checks 8 times after 200ms whether the group is now
288 * empty, then kills everything that is left with SIGKILL and
289 * finally checks 5 times after 200ms each whether the group
290 * is finally empty. */
291
292 for (i = 0; i < 15; i++) {
293 int sig, r;
294
295 if (i <= 0)
296 sig = SIGTERM;
297 else if (i == 9)
298 sig = SIGKILL;
299 else
300 sig = 0;
301
302 r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL);
303 if (r <= 0)
304 return r;
305
306 usleep(200 * USEC_PER_MSEC);
307 }
308
309 return 0;
310 }
311
312 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
313 bool done = false;
314 _cleanup_set_free_ Set *s = NULL;
315 int r, ret = 0;
316 pid_t my_pid;
317
318 assert(cfrom);
319 assert(pfrom);
320 assert(cto);
321 assert(pto);
322
323 s = set_new(trivial_hash_func, trivial_compare_func);
324 if (!s)
325 return -ENOMEM;
326
327 my_pid = getpid();
328
329 do {
330 _cleanup_fclose_ FILE *f = NULL;
331 pid_t pid = 0;
332 done = true;
333
334 r = cg_enumerate_processes(cfrom, pfrom, &f);
335 if (r < 0) {
336 if (ret >= 0 && r != -ENOENT)
337 return r;
338
339 return ret;
340 }
341
342 while ((r = cg_read_pid(f, &pid)) > 0) {
343
344 /* This might do weird stuff if we aren't a
345 * single-threaded program. However, we
346 * luckily know we are not */
347 if (ignore_self && pid == my_pid)
348 continue;
349
350 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
351 continue;
352
353 r = cg_attach(cto, pto, pid);
354 if (r < 0) {
355 if (ret >= 0 && r != -ESRCH)
356 ret = r;
357 } else if (ret == 0)
358 ret = 1;
359
360 done = false;
361
362 r = set_put(s, LONG_TO_PTR(pid));
363 if (r < 0) {
364 if (ret >= 0)
365 return r;
366
367 return ret;
368 }
369 }
370
371 if (r < 0) {
372 if (ret >= 0)
373 return r;
374
375 return ret;
376 }
377 } while (!done);
378
379 return ret;
380 }
381
382 int cg_migrate_recursive(
383 const char *cfrom,
384 const char *pfrom,
385 const char *cto,
386 const char *pto,
387 bool ignore_self,
388 bool rem) {
389
390 _cleanup_closedir_ DIR *d = NULL;
391 int r, ret = 0;
392 char *fn;
393
394 assert(cfrom);
395 assert(pfrom);
396 assert(cto);
397 assert(pto);
398
399 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
400
401 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
402 if (r < 0) {
403 if (ret >= 0 && r != -ENOENT)
404 return r;
405
406 return ret;
407 }
408
409 while ((r = cg_read_subgroup(d, &fn)) > 0) {
410 _cleanup_free_ char *p = NULL;
411
412 p = strjoin(pfrom, "/", fn, NULL);
413 free(fn);
414 if (!p) {
415 if (ret >= 0)
416 return -ENOMEM;
417
418 return ret;
419 }
420
421 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
422 if (r != 0 && ret >= 0)
423 ret = r;
424 }
425
426 if (r < 0 && ret >= 0)
427 ret = r;
428
429 if (rem) {
430 r = cg_rmdir(cfrom, pfrom);
431 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
432 return r;
433 }
434
435 return ret;
436 }
437
438 static const char *normalize_controller(const char *controller) {
439
440 assert(controller);
441
442 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
443 return "systemd";
444 else if (startswith(controller, "name="))
445 return controller + 5;
446 else
447 return controller;
448 }
449
450 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
451 char *t = NULL;
452
453 if (!isempty(controller)) {
454 if (!isempty(path) && !isempty(suffix))
455 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
456 else if (!isempty(path))
457 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
458 else if (!isempty(suffix))
459 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
460 else
461 t = strappend("/sys/fs/cgroup/", controller);
462 } else {
463 if (!isempty(path) && !isempty(suffix))
464 t = strjoin(path, "/", suffix, NULL);
465 else if (!isempty(path))
466 t = strdup(path);
467 else
468 return -EINVAL;
469 }
470
471 if (!t)
472 return -ENOMEM;
473
474 path_kill_slashes(t);
475
476 *fs = t;
477 return 0;
478 }
479
480 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
481 const char *p;
482 static __thread bool good = false;
483
484 assert(fs);
485
486 if (controller && !cg_controller_is_valid(controller, true))
487 return -EINVAL;
488
489 if (_unlikely_(!good)) {
490 int r;
491
492 r = path_is_mount_point("/sys/fs/cgroup", false);
493 if (r <= 0)
494 return r < 0 ? r : -ENOENT;
495
496 /* Cache this to save a few stat()s */
497 good = true;
498 }
499
500 p = controller ? normalize_controller(controller) : NULL;
501
502 return join_path(p, path, suffix, fs);
503 }
504
505 static int check_hierarchy(const char *p) {
506 char *cc;
507
508 assert(p);
509
510 /* Check if this controller actually really exists */
511 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
512 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
513 if (access(cc, F_OK) < 0)
514 return -errno;
515
516 return 0;
517 }
518
519 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
520 const char *p;
521 int r;
522
523 assert(fs);
524
525 if (!cg_controller_is_valid(controller, true))
526 return -EINVAL;
527
528 /* Normalize the controller syntax */
529 p = normalize_controller(controller);
530
531 /* Check if this controller actually really exists */
532 r = check_hierarchy(p);
533 if (r < 0)
534 return r;
535
536 return join_path(p, path, suffix, fs);
537 }
538
539 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
540 assert(path);
541 assert(sb);
542 assert(ftwbuf);
543
544 if (typeflag != FTW_DP)
545 return 0;
546
547 if (ftwbuf->level < 1)
548 return 0;
549
550 rmdir(path);
551 return 0;
552 }
553
554 int cg_trim(const char *controller, const char *path, bool delete_root) {
555 _cleanup_free_ char *fs = NULL;
556 int r = 0;
557
558 assert(path);
559
560 r = cg_get_path(controller, path, NULL, &fs);
561 if (r < 0)
562 return r;
563
564 errno = 0;
565 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
566 r = errno ? -errno : -EIO;
567
568 if (delete_root) {
569 if (rmdir(fs) < 0 && errno != ENOENT)
570 return -errno;
571 }
572
573 return r;
574 }
575
576 int cg_delete(const char *controller, const char *path) {
577 _cleanup_free_ char *parent = NULL;
578 int r;
579
580 assert(path);
581
582 r = path_get_parent(path, &parent);
583 if (r < 0)
584 return r;
585
586 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
587 return r == -ENOENT ? 0 : r;
588 }
589
590 int cg_attach(const char *controller, const char *path, pid_t pid) {
591 _cleanup_free_ char *fs = NULL;
592 char c[DECIMAL_STR_MAX(pid_t) + 2];
593 int r;
594
595 assert(path);
596 assert(pid >= 0);
597
598 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
599 if (r < 0)
600 return r;
601
602 if (pid == 0)
603 pid = getpid();
604
605 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
606
607 return write_string_file(fs, c);
608 }
609
610 int cg_set_group_access(
611 const char *controller,
612 const char *path,
613 mode_t mode,
614 uid_t uid,
615 gid_t gid) {
616
617 _cleanup_free_ char *fs = NULL;
618 int r;
619
620 assert(path);
621
622 if (mode != (mode_t) -1)
623 mode &= 0777;
624
625 r = cg_get_path(controller, path, NULL, &fs);
626 if (r < 0)
627 return r;
628
629 return chmod_and_chown(fs, mode, uid, gid);
630 }
631
632 int cg_set_task_access(
633 const char *controller,
634 const char *path,
635 mode_t mode,
636 uid_t uid,
637 gid_t gid) {
638
639 _cleanup_free_ char *fs = NULL, *procs = NULL;
640 int r;
641
642 assert(path);
643
644 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1)
645 return 0;
646
647 if (mode != (mode_t) -1)
648 mode &= 0666;
649
650 r = cg_get_path(controller, path, "cgroup.procs", &fs);
651 if (r < 0)
652 return r;
653
654 r = chmod_and_chown(fs, mode, uid, gid);
655 if (r < 0)
656 return r;
657
658 /* Compatibility, Always keep values for "tasks" in sync with
659 * "cgroup.procs" */
660 r = cg_get_path(controller, path, "tasks", &procs);
661 if (r < 0)
662 return r;
663
664 return chmod_and_chown(procs, mode, uid, gid);
665 }
666
667 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
668 _cleanup_fclose_ FILE *f = NULL;
669 char line[LINE_MAX];
670 const char *fs;
671 size_t cs;
672
673 assert(path);
674 assert(pid >= 0);
675
676 if (controller) {
677 if (!cg_controller_is_valid(controller, true))
678 return -EINVAL;
679
680 controller = normalize_controller(controller);
681 } else
682 controller = SYSTEMD_CGROUP_CONTROLLER;
683
684 if (pid == 0)
685 fs = "/proc/self/cgroup";
686 else
687 fs = procfs_file_alloca(pid, "cgroup");
688
689 f = fopen(fs, "re");
690 if (!f)
691 return errno == ENOENT ? -ESRCH : -errno;
692
693 cs = strlen(controller);
694
695 FOREACH_LINE(line, f, return -errno) {
696 char *l, *p, *w, *e;
697 size_t k;
698 char *state;
699 bool found = false;
700
701 truncate_nl(line);
702
703 l = strchr(line, ':');
704 if (!l)
705 continue;
706
707 l++;
708 e = strchr(l, ':');
709 if (!e)
710 continue;
711
712 *e = 0;
713
714 FOREACH_WORD_SEPARATOR(w, k, l, ",", state) {
715
716 if (k == cs && memcmp(w, controller, cs) == 0) {
717 found = true;
718 break;
719 }
720
721 if (k == 5 + cs &&
722 memcmp(w, "name=", 5) == 0 &&
723 memcmp(w+5, controller, cs) == 0) {
724 found = true;
725 break;
726 }
727 }
728
729 if (!found)
730 continue;
731
732 p = strdup(e + 1);
733 if (!p)
734 return -ENOMEM;
735
736 *path = p;
737 return 0;
738 }
739
740 return -ENOENT;
741 }
742
743 int cg_install_release_agent(const char *controller, const char *agent) {
744 _cleanup_free_ char *fs = NULL, *contents = NULL;
745 char *sc;
746 int r;
747
748 assert(agent);
749
750 r = cg_get_path(controller, NULL, "release_agent", &fs);
751 if (r < 0)
752 return r;
753
754 r = read_one_line_file(fs, &contents);
755 if (r < 0)
756 return r;
757
758 sc = strstrip(contents);
759 if (sc[0] == 0) {
760 r = write_string_file(fs, agent);
761 if (r < 0)
762 return r;
763 } else if (!streq(sc, agent))
764 return -EEXIST;
765
766 free(fs);
767 fs = NULL;
768 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
769 if (r < 0)
770 return r;
771
772 free(contents);
773 contents = NULL;
774 r = read_one_line_file(fs, &contents);
775 if (r < 0)
776 return r;
777
778 sc = strstrip(contents);
779 if (streq(sc, "0")) {
780 r = write_string_file(fs, "1");
781 if (r < 0)
782 return r;
783
784 return 1;
785 }
786
787 if (!streq(sc, "1"))
788 return -EIO;
789
790 return 0;
791 }
792
793 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
794 _cleanup_fclose_ FILE *f = NULL;
795 pid_t pid = 0, self_pid;
796 bool found = false;
797 int r;
798
799 assert(path);
800
801 r = cg_enumerate_processes(controller, path, &f);
802 if (r < 0)
803 return r == -ENOENT ? 1 : r;
804
805 self_pid = getpid();
806
807 while ((r = cg_read_pid(f, &pid)) > 0) {
808
809 if (ignore_self && pid == self_pid)
810 continue;
811
812 found = true;
813 break;
814 }
815
816 if (r < 0)
817 return r;
818
819 return !found;
820 }
821
822 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
823 _cleanup_free_ char *controller = NULL, *path = NULL;
824 int r;
825
826 assert(spec);
827
828 r = cg_split_spec(spec, &controller, &path);
829 if (r < 0)
830 return r;
831
832 return cg_is_empty(controller, path, ignore_self);
833 }
834
835 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
836 _cleanup_closedir_ DIR *d = NULL;
837 char *fn;
838 int r;
839
840 assert(path);
841
842 r = cg_is_empty(controller, path, ignore_self);
843 if (r <= 0)
844 return r;
845
846 r = cg_enumerate_subgroups(controller, path, &d);
847 if (r < 0)
848 return r == -ENOENT ? 1 : r;
849
850 while ((r = cg_read_subgroup(d, &fn)) > 0) {
851 _cleanup_free_ char *p = NULL;
852
853 p = strjoin(path, "/", fn, NULL);
854 free(fn);
855 if (!p)
856 return -ENOMEM;
857
858 r = cg_is_empty_recursive(controller, p, ignore_self);
859 if (r <= 0)
860 return r;
861 }
862
863 if (r < 0)
864 return r;
865
866 return 1;
867 }
868
869 int cg_split_spec(const char *spec, char **controller, char **path) {
870 const char *e;
871 char *t = NULL, *u = NULL;
872 _cleanup_free_ char *v = NULL;
873
874 assert(spec);
875
876 if (*spec == '/') {
877 if (!path_is_safe(spec))
878 return -EINVAL;
879
880 if (path) {
881 t = strdup(spec);
882 if (!t)
883 return -ENOMEM;
884
885 path_kill_slashes(t);
886 *path = t;
887 }
888
889 if (controller)
890 *controller = NULL;
891
892 return 0;
893 }
894
895 e = strchr(spec, ':');
896 if (!e) {
897 if (!cg_controller_is_valid(spec, true))
898 return -EINVAL;
899
900 if (controller) {
901 t = strdup(normalize_controller(spec));
902 if (!t)
903 return -ENOMEM;
904
905 *controller = t;
906 }
907
908 if (path)
909 *path = NULL;
910
911 return 0;
912 }
913
914 v = strndup(spec, e-spec);
915 if (!v)
916 return -ENOMEM;
917 t = strdup(normalize_controller(v));
918 if (!t)
919 return -ENOMEM;
920 if (!cg_controller_is_valid(t, true)) {
921 free(t);
922 return -EINVAL;
923 }
924
925 u = strdup(e+1);
926 if (!u) {
927 free(t);
928 return -ENOMEM;
929 }
930 if (!path_is_safe(u) ||
931 !path_is_absolute(u)) {
932 free(t);
933 free(u);
934 return -EINVAL;
935 }
936
937 path_kill_slashes(u);
938
939 if (controller)
940 *controller = t;
941 else
942 free(t);
943
944 if (path)
945 *path = u;
946 else
947 free(u);
948
949 return 0;
950 }
951
952 int cg_join_spec(const char *controller, const char *path, char **spec) {
953 char *s;
954
955 assert(path);
956
957 if (!controller)
958 controller = "systemd";
959 else {
960 if (!cg_controller_is_valid(controller, true))
961 return -EINVAL;
962
963 controller = normalize_controller(controller);
964 }
965
966 if (!path_is_absolute(path))
967 return -EINVAL;
968
969 s = strjoin(controller, ":", path, NULL);
970 if (!s)
971 return -ENOMEM;
972
973 path_kill_slashes(s + strlen(controller) + 1);
974
975 *spec = s;
976 return 0;
977 }
978
979 int cg_mangle_path(const char *path, char **result) {
980 _cleanup_free_ char *c = NULL, *p = NULL;
981 char *t;
982 int r;
983
984 assert(path);
985 assert(result);
986
987 /* First check if it already is a filesystem path */
988 if (path_startswith(path, "/sys/fs/cgroup")) {
989
990 t = strdup(path);
991 if (!t)
992 return -ENOMEM;
993
994 path_kill_slashes(t);
995 *result = t;
996 return 0;
997 }
998
999 /* Otherwise treat it as cg spec */
1000 r = cg_split_spec(path, &c, &p);
1001 if (r < 0)
1002 return r;
1003
1004 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1005 }
1006
1007 int cg_get_root_path(char **path) {
1008 char *p, *e;
1009 int r;
1010
1011 assert(path);
1012
1013 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1014 if (r < 0)
1015 return r;
1016
1017 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE);
1018 if (e)
1019 *e = 0;
1020
1021 *path = p;
1022 return 0;
1023 }
1024
1025 char **cg_shorten_controllers(char **controllers) {
1026 char **f, **t;
1027
1028 if (!controllers)
1029 return controllers;
1030
1031 for (f = controllers, t = controllers; *f; f++) {
1032 const char *p;
1033 int r;
1034
1035 p = normalize_controller(*f);
1036
1037 if (streq(p, "systemd")) {
1038 free(*f);
1039 continue;
1040 }
1041
1042 if (!cg_controller_is_valid(p, true)) {
1043 log_warning("Controller %s is not valid, removing from controllers list.", p);
1044 free(*f);
1045 continue;
1046 }
1047
1048 r = check_hierarchy(p);
1049 if (r < 0) {
1050 log_debug("Controller %s is not available, removing from controllers list.", p);
1051 free(*f);
1052 continue;
1053 }
1054
1055 *(t++) = *f;
1056 }
1057
1058 *t = NULL;
1059 return strv_uniq(controllers);
1060 }
1061
1062 int cg_pid_get_path_shifted(pid_t pid, char **root, char **cgroup) {
1063 _cleanup_free_ char *cg_root = NULL;
1064 char *cg_process, *p;
1065 int r;
1066
1067 r = cg_get_root_path(&cg_root);
1068 if (r < 0)
1069 return r;
1070
1071 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1072 if (r < 0)
1073 return r;
1074
1075 p = path_startswith(cg_process, cg_root);
1076 if (p)
1077 p--;
1078 else
1079 p = cg_process;
1080
1081 if (cgroup) {
1082 char* c;
1083
1084 c = strdup(p);
1085 if (!c) {
1086 free(cg_process);
1087 return -ENOMEM;
1088 }
1089
1090 *cgroup = c;
1091 }
1092
1093 if (root) {
1094 cg_process[p-cg_process] = 0;
1095 *root = cg_process;
1096 } else
1097 free(cg_process);
1098
1099 return 0;
1100 }
1101
1102 int cg_path_decode_unit(const char *cgroup, char **unit){
1103 char *p, *e, *c, *s, *k;
1104
1105 assert(cgroup);
1106 assert(unit);
1107
1108 e = strchrnul(cgroup, '/');
1109 c = strndupa(cgroup, e - cgroup);
1110 c = cg_unescape(c);
1111
1112 /* Could this be a valid unit name? */
1113 if (!unit_name_is_valid(c, true))
1114 return -EINVAL;
1115
1116 if (!unit_name_is_template(c))
1117 s = strdup(c);
1118 else {
1119 if (*e != '/')
1120 return -EINVAL;
1121
1122 e += strspn(e, "/");
1123
1124 p = strchrnul(e, '/');
1125 k = strndupa(e, p - e);
1126 k = cg_unescape(k);
1127
1128 if (!unit_name_is_valid(k, false))
1129 return -EINVAL;
1130
1131 s = strdup(k);
1132 }
1133
1134 if (!s)
1135 return -ENOMEM;
1136
1137 *unit = s;
1138 return 0;
1139 }
1140
1141 static const char *skip_slices(const char *p) {
1142 /* Skips over all slice assignments */
1143
1144 for (;;) {
1145 size_t n;
1146
1147 p += strspn(p, "/");
1148
1149 n = strcspn(p, "/");
1150 if (n <= 6 || memcmp(p + n - 6, ".slice", 6) != 0)
1151 return p;
1152
1153 p += n;
1154 }
1155 }
1156
1157 int cg_path_get_unit(const char *path, char **unit) {
1158 const char *e;
1159
1160 assert(path);
1161 assert(unit);
1162
1163 e = skip_slices(path);
1164
1165 return cg_path_decode_unit(e, unit);
1166 }
1167
1168 int cg_pid_get_unit(pid_t pid, char **unit) {
1169 _cleanup_free_ char *cgroup = NULL;
1170 int r;
1171
1172 assert(unit);
1173
1174 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1175 if (r < 0)
1176 return r;
1177
1178 return cg_path_get_unit(cgroup, unit);
1179 }
1180
1181 static const char *skip_user(const char *p) {
1182 size_t n;
1183
1184 assert(p);
1185
1186 p += strspn(p, "/");
1187
1188 n = strcspn(p, "/");
1189 if (n <= 5 || memcmp(p + n - 5, ".user", 5) != 0)
1190 return p;
1191
1192 p += n;
1193 p += strspn(p, "/");
1194
1195 return p;
1196 }
1197
1198 static const char *skip_session(const char *p) {
1199 size_t n;
1200
1201 assert(p);
1202
1203 p += strspn(p, "/");
1204
1205 n = strcspn(p, "/");
1206 if (n <= 8 || memcmp(p + n - 8, ".session", 8) != 0)
1207 return NULL;
1208
1209 p += n;
1210 p += strspn(p, "/");
1211
1212 return p;
1213 }
1214
1215 static const char *skip_systemd_label(const char *p) {
1216 size_t n;
1217
1218 assert(p);
1219
1220 p += strspn(p, "/");
1221
1222 n = strcspn(p, "/");
1223 if (n < 8 || memcmp(p, "systemd-", 8) != 0)
1224 return p;
1225
1226 p += n;
1227 p += strspn(p, "/");
1228
1229 return p;
1230 }
1231
1232 int cg_path_get_user_unit(const char *path, char **unit) {
1233 const char *e;
1234
1235 assert(path);
1236 assert(unit);
1237
1238 /* We always have to parse the path from the beginning as unit
1239 * cgroups might have arbitrary child cgroups and we shouldn't get
1240 * confused by those */
1241
1242 /* Skip slices, if there are any */
1243 e = skip_slices(path);
1244
1245 /* Skip the user name, if there is one */
1246 e = skip_user(e);
1247
1248 /* Skip the session ID, require that there is one */
1249 e = skip_session(e);
1250 if (!e)
1251 return -ENOENT;
1252
1253 /* Skip the systemd cgroup, if there is one */
1254 e = skip_systemd_label(e);
1255
1256 return cg_path_decode_unit(e, unit);
1257 }
1258
1259 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1260 _cleanup_free_ char *cgroup = NULL;
1261 int r;
1262
1263 assert(unit);
1264
1265 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1266 if (r < 0)
1267 return r;
1268
1269 return cg_path_get_user_unit(cgroup, unit);
1270 }
1271
1272 int cg_path_get_machine_name(const char *path, char **machine) {
1273 const char *e, *n, *x;
1274 char *s, *r;
1275
1276 assert(path);
1277 assert(machine);
1278
1279 /* Skip slices, if there are any */
1280 e = skip_slices(path);
1281
1282 n = strchrnul(e, '/');
1283 if (e == n)
1284 return -ENOENT;
1285
1286 s = strndupa(e, n - e);
1287 s = cg_unescape(s);
1288
1289 x = endswith(s, ".machine");
1290 if (!x)
1291 return -ENOENT;
1292
1293 r = strndup(s, x - s);
1294 if (!r)
1295 return -ENOMEM;
1296
1297 *machine = r;
1298 return 0;
1299 }
1300
1301 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1302 _cleanup_free_ char *cgroup = NULL;
1303 int r;
1304
1305 assert(machine);
1306
1307 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1308 if (r < 0)
1309 return r;
1310
1311 return cg_path_get_machine_name(cgroup, machine);
1312 }
1313
1314 int cg_path_get_session(const char *path, char **session) {
1315 const char *e, *n;
1316 char *s;
1317
1318 assert(path);
1319 assert(session);
1320
1321 /* Skip slices, if there are any */
1322 e = skip_slices(path);
1323
1324 /* Skip the user name, if there is one */
1325 e = skip_user(e);
1326
1327 n = strchrnul(e, '/');
1328 if (n - e < 8)
1329 return -ENOENT;
1330 if (memcmp(n - 8, ".session", 8) != 0)
1331 return -ENOENT;
1332
1333 s = strndup(e, n - e - 8);
1334 if (!s)
1335 return -ENOMEM;
1336
1337 *session = s;
1338 return 0;
1339 }
1340
1341 int cg_pid_get_session(pid_t pid, char **session) {
1342 _cleanup_free_ char *cgroup = NULL;
1343 int r;
1344
1345 assert(session);
1346
1347 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1348 if (r < 0)
1349 return r;
1350
1351 return cg_path_get_session(cgroup, session);
1352 }
1353
1354 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1355 const char *e, *n;
1356 char *s;
1357
1358 assert(path);
1359 assert(uid);
1360
1361 /* Skip slices, if there are any */
1362 e = skip_slices(path);
1363
1364 n = strchrnul(e, '/');
1365 if (n - e < 5)
1366 return -ENOENT;
1367 if (memcmp(n - 5, ".user", 5) != 0)
1368 return -ENOENT;
1369
1370 s = strndupa(e, n - e - 5);
1371 if (!s)
1372 return -ENOMEM;
1373
1374 return parse_uid(s, uid);
1375 }
1376
1377 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1378 _cleanup_free_ char *cgroup = NULL;
1379 int r;
1380
1381 assert(uid);
1382
1383 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1384 if (r < 0)
1385 return r;
1386
1387 return cg_path_get_owner_uid(cgroup, uid);
1388 }
1389
1390 int cg_path_get_slice(const char *p, char **slice) {
1391 const char *e = NULL;
1392 size_t m = 0;
1393
1394 assert(p);
1395 assert(slice);
1396
1397 for (;;) {
1398 size_t n;
1399
1400 p += strspn(p, "/");
1401
1402 n = strcspn(p, "/");
1403 if (n <= 6 || memcmp(p + n - 6, ".slice", 6) != 0) {
1404 char *s;
1405
1406 if (!e)
1407 return -ENOENT;
1408
1409 s = strndup(e, m);
1410 if (!s)
1411 return -ENOMEM;
1412
1413 *slice = s;
1414 return 0;
1415 }
1416
1417 e = p;
1418 m = n;
1419
1420 p += n;
1421 }
1422 }
1423
1424 int cg_pid_get_slice(pid_t pid, char **slice) {
1425 _cleanup_free_ char *cgroup = NULL;
1426 int r;
1427
1428 assert(slice);
1429
1430 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1431 if (r < 0)
1432 return r;
1433
1434 return cg_path_get_slice(cgroup, slice);
1435 }
1436
1437 int cg_controller_from_attr(const char *attr, char **controller) {
1438 const char *dot;
1439 char *c;
1440
1441 assert(attr);
1442 assert(controller);
1443
1444 if (!filename_is_safe(attr))
1445 return -EINVAL;
1446
1447 dot = strchr(attr, '.');
1448 if (!dot) {
1449 *controller = NULL;
1450 return 0;
1451 }
1452
1453 c = strndup(attr, dot - attr);
1454 if (!c)
1455 return -ENOMEM;
1456
1457 if (!cg_controller_is_valid(c, false)) {
1458 free(c);
1459 return -EINVAL;
1460 }
1461
1462 *controller = c;
1463 return 1;
1464 }
1465
1466 char *cg_escape(const char *p) {
1467 bool need_prefix = false;
1468
1469 /* This implements very minimal escaping for names to be used
1470 * as file names in the cgroup tree: any name which might
1471 * conflict with a kernel name or is prefixed with '_' is
1472 * prefixed with a '_'. That way, when reading cgroup names it
1473 * is sufficient to remove a single prefixing underscore if
1474 * there is one. */
1475
1476 /* The return value of this function (unlike cg_unescape())
1477 * needs free()! */
1478
1479 if (p[0] == 0 ||
1480 p[0] == '_' ||
1481 p[0] == '.' ||
1482 streq(p, "notify_on_release") ||
1483 streq(p, "release_agent") ||
1484 streq(p, "tasks"))
1485 need_prefix = true;
1486 else {
1487 const char *dot;
1488
1489 dot = strrchr(p, '.');
1490 if (dot) {
1491
1492 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1493 need_prefix = true;
1494 else {
1495 char *n;
1496
1497 n = strndupa(p, dot - p);
1498
1499 if (check_hierarchy(n) >= 0)
1500 need_prefix = true;
1501 }
1502 }
1503 }
1504
1505 if (need_prefix)
1506 return strappend("_", p);
1507 else
1508 return strdup(p);
1509 }
1510
1511 char *cg_unescape(const char *p) {
1512 assert(p);
1513
1514 /* The return value of this function (unlike cg_escape())
1515 * doesn't need free()! */
1516
1517 if (p[0] == '_')
1518 return (char*) p+1;
1519
1520 return (char*) p;
1521 }
1522
1523 #define CONTROLLER_VALID \
1524 "0123456789" \
1525 "abcdefghijklmnopqrstuvwxyz" \
1526 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
1527 "_"
1528
1529 bool cg_controller_is_valid(const char *p, bool allow_named) {
1530 const char *t, *s;
1531
1532 if (!p)
1533 return false;
1534
1535 if (allow_named) {
1536 s = startswith(p, "name=");
1537 if (s)
1538 p = s;
1539 }
1540
1541 if (*p == 0 || *p == '_')
1542 return false;
1543
1544 for (t = p; *t; t++)
1545 if (!strchr(CONTROLLER_VALID, *t))
1546 return false;
1547
1548 if (t - p > FILENAME_MAX)
1549 return false;
1550
1551 return true;
1552 }
1553
1554 int cg_slice_to_path(const char *unit, char **ret) {
1555 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1556 const char *dash;
1557
1558 assert(unit);
1559 assert(ret);
1560
1561 if (!unit_name_is_valid(unit, false))
1562 return -EINVAL;
1563
1564 if (!endswith(unit, ".slice"))
1565 return -EINVAL;
1566
1567 p = unit_name_to_prefix(unit);
1568 if (!p)
1569 return -ENOMEM;
1570
1571 dash = strchr(p, '-');
1572 while (dash) {
1573 _cleanup_free_ char *escaped = NULL;
1574 char n[dash - p + sizeof(".slice")];
1575
1576 strcpy(stpncpy(n, p, dash - p), ".slice");
1577
1578 if (!unit_name_is_valid(n, false))
1579 return -EINVAL;
1580
1581 escaped = cg_escape(n);
1582 if (!escaped)
1583 return -ENOMEM;
1584
1585 if (!strextend(&s, escaped, "/", NULL))
1586 return -ENOMEM;
1587
1588 dash = strchr(dash+1, '-');
1589 }
1590
1591 e = cg_escape(unit);
1592 if (!e)
1593 return -ENOMEM;
1594
1595 if (!strextend(&s, e, NULL))
1596 return -ENOMEM;
1597
1598 *ret = s;
1599 s = NULL;
1600
1601 return 0;
1602 }
1603
1604 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1605 _cleanup_free_ char *p = NULL;
1606 int r;
1607
1608 r = cg_get_path(controller, path, attribute, &p);
1609 if (r < 0)
1610 return r;
1611
1612 return write_string_file(p, value);
1613 }
1614
1615 static const char mask_names[] =
1616 "cpu\0"
1617 "cpuacct\0"
1618 "blkio\0"
1619 "memory\0"
1620 "devices\0";
1621
1622 int cg_create_with_mask(CGroupControllerMask mask, const char *path) {
1623 CGroupControllerMask bit = 1;
1624 const char *n;
1625 int r;
1626
1627 /* This one will create a cgroup in our private tree, but also
1628 * duplicate it in the trees specified in mask, and remove it
1629 * in all others */
1630
1631 /* First create the cgroup in our own hierarchy. */
1632 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1633 if (r < 0)
1634 return r;
1635
1636 /* Then, do the same in the other hierarchies */
1637 NULSTR_FOREACH(n, mask_names) {
1638 if (bit & mask)
1639 cg_create(n, path);
1640 else
1641 cg_trim(n, path, true);
1642
1643 bit <<= 1;
1644 }
1645
1646 return r;
1647 }
1648
1649 int cg_attach_with_mask(CGroupControllerMask mask, const char *path, pid_t pid) {
1650 CGroupControllerMask bit = 1;
1651 const char *n;
1652 int r;
1653
1654 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1655
1656 NULSTR_FOREACH(n, mask_names) {
1657 if (bit & mask)
1658 cg_attach(n, path, pid);
1659 else {
1660 char prefix[strlen(path) + 1], *slash;
1661
1662 /* OK, this one is a bit harder... Now we need
1663 * to add to the closest parent cgroup we
1664 * can find */
1665 strcpy(prefix, path);
1666 while ((slash = strrchr(prefix, '/'))) {
1667 int q;
1668 *slash = 0;
1669
1670 q = cg_attach(n, prefix, pid);
1671 if (q >= 0)
1672 break;
1673 }
1674 }
1675
1676 bit <<= 1;
1677 }
1678
1679 return r;
1680 }
1681
1682 int cg_migrate_with_mask(CGroupControllerMask mask, const char *from, const char *to) {
1683 CGroupControllerMask bit = 1;
1684 const char *n;
1685 int r;
1686
1687 if (path_equal(from, to))
1688 return 0;
1689
1690 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1691
1692 NULSTR_FOREACH(n, mask_names) {
1693 if (bit & mask)
1694 cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, to, n, to, false, false);
1695 else {
1696 char prefix[strlen(to) + 1], *slash;
1697
1698 strcpy(prefix, to);
1699 while ((slash = strrchr(prefix, '/'))) {
1700 int q;
1701
1702 *slash = 0;
1703
1704 q = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, to, n, prefix, false, false);
1705 if (q >= 0)
1706 break;
1707 }
1708 }
1709
1710 bit <<= 1;
1711 }
1712
1713 return r;
1714 }
1715
1716 int cg_trim_with_mask(CGroupControllerMask mask, const char *path, bool delete_root) {
1717 CGroupControllerMask bit = 1;
1718 const char *n;
1719 int r;
1720
1721 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1722 if (r < 0)
1723 return r;
1724
1725 NULSTR_FOREACH(n, mask_names) {
1726 if (bit & mask)
1727 cg_trim(n, path, delete_root);
1728
1729 bit <<= 1;
1730 }
1731
1732 return r;
1733 }
1734
1735 CGroupControllerMask cg_mask_supported(void) {
1736 CGroupControllerMask bit = 1, mask = 0;
1737 const char *n;
1738
1739 NULSTR_FOREACH(n, mask_names) {
1740 if (check_hierarchy(n) >= 0)
1741 mask |= bit;
1742
1743 bit <<= 1;
1744 }
1745
1746 return mask;
1747 }