1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
38 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
47 if ((r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
)) < 0)
60 int cg_enumerate_tasks(const char *controller
, const char *path
, FILE **_f
) {
69 if ((r
= cg_get_path(controller
, path
, "tasks", &fs
)) < 0)
82 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
85 /* Note that the cgroup.procs might contain duplicates! See
86 * cgroups.txt for details. */
89 if (fscanf(f
, "%lu", &ul
) != 1) {
94 return errno
? -errno
: -EIO
;
104 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
113 /* This is not recursive! */
115 if ((r
= cg_get_path(controller
, path
, NULL
, &fs
)) < 0)
128 int cg_read_subgroup(DIR *d
, char **fn
) {
134 while ((de
= readdir(d
))) {
137 if (de
->d_type
!= DT_DIR
)
140 if (streq(de
->d_name
, ".") ||
141 streq(de
->d_name
, ".."))
144 if (!(b
= strdup(de
->d_name
)))
157 int cg_rmdir(const char *controller
, const char *path
, bool honour_sticky
) {
161 r
= cg_get_path(controller
, path
, NULL
, &p
);
168 /* If the sticky bit is set don't remove the directory */
170 tasks
= strappend(p
, "/tasks");
176 r
= file_is_priv_sticky(tasks
);
188 return (r
< 0 && errno
!= ENOENT
) ? -errno
: 0;
191 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
196 Set
*allocated_set
= NULL
;
202 /* This goes through the tasks list and kills them all. This
203 * is repeated until no further processes are added to the
204 * tasks list, to properly handle forking processes */
207 if (!(s
= allocated_set
= set_new(trivial_hash_func
, trivial_compare_func
)))
216 if ((r
= cg_enumerate_processes(controller
, path
, &f
)) < 0) {
217 if (ret
>= 0 && r
!= -ENOENT
)
223 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
225 if (pid
== my_pid
&& ignore_self
)
228 if (set_get(s
, LONG_TO_PTR(pid
)) == LONG_TO_PTR(pid
))
231 /* If we haven't killed this process yet, kill
233 if (kill(pid
, sig
) < 0) {
234 if (ret
>= 0 && errno
!= ESRCH
)
236 } else if (ret
== 0) {
246 if ((r
= set_put(s
, LONG_TO_PTR(pid
))) < 0) {
264 /* To avoid racing against processes which fork
265 * quicker than we can kill them we repeat this until
266 * no new pids need to be killed. */
272 set_free(allocated_set
);
280 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
284 Set
*allocated_set
= NULL
;
291 if (!(s
= allocated_set
= set_new(trivial_hash_func
, trivial_compare_func
)))
294 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
296 if ((r
= cg_enumerate_subgroups(controller
, path
, &d
)) < 0) {
297 if (ret
>= 0 && r
!= -ENOENT
)
303 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
306 r
= asprintf(&p
, "%s/%s", path
, fn
);
316 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
319 if (r
!= 0 && ret
>= 0)
323 if (r
< 0 && ret
>= 0)
327 if ((r
= cg_rmdir(controller
, path
, true)) < 0) {
339 set_free(allocated_set
);
344 int cg_kill_recursive_and_wait(const char *controller
, const char *path
, bool rem
) {
350 /* This safely kills all processes; first it sends a SIGTERM,
351 * then checks 8 times after 200ms whether the group is now
352 * empty, then kills everything that is left with SIGKILL and
353 * finally checks 5 times after 200ms each whether the group
354 * is finally empty. */
356 for (i
= 0; i
< 15; i
++) {
366 if ((r
= cg_kill_recursive(controller
, path
, sig
, true, true, rem
, NULL
)) <= 0)
369 usleep(200 * USEC_PER_MSEC
);
375 int cg_migrate(const char *controller
, const char *from
, const char *to
, bool ignore_self
) {
386 if (!(s
= set_new(trivial_hash_func
, trivial_compare_func
)))
395 if ((r
= cg_enumerate_tasks(controller
, from
, &f
)) < 0) {
396 if (ret
>= 0 && r
!= -ENOENT
)
402 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
404 /* This might do weird stuff if we aren't a
405 * single-threaded program. However, we
406 * luckily know we are not */
407 if (pid
== my_pid
&& ignore_self
)
410 if (set_get(s
, LONG_TO_PTR(pid
)) == LONG_TO_PTR(pid
))
413 if ((r
= cg_attach(controller
, to
, pid
)) < 0) {
414 if (ret
>= 0 && r
!= -ESRCH
)
421 if ((r
= set_put(s
, LONG_TO_PTR(pid
))) < 0) {
450 int cg_migrate_recursive(const char *controller
, const char *from
, const char *to
, bool ignore_self
, bool rem
) {
459 ret
= cg_migrate(controller
, from
, to
, ignore_self
);
461 if ((r
= cg_enumerate_subgroups(controller
, from
, &d
)) < 0) {
462 if (ret
>= 0 && r
!= -ENOENT
)
467 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
470 r
= asprintf(&p
, "%s/%s", from
, fn
);
480 r
= cg_migrate_recursive(controller
, p
, to
, ignore_self
, rem
);
483 if (r
!= 0 && ret
>= 0)
487 if (r
< 0 && ret
>= 0)
491 if ((r
= cg_rmdir(controller
, from
, true)) < 0) {
505 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
508 static __thread
bool good
= false;
513 if (_unlikely_(!good
)) {
516 r
= path_is_mount_point("/sys/fs/cgroup", false);
518 return r
< 0 ? r
: -ENOENT
;
520 /* Cache this to save a few stat()s */
524 if (isempty(controller
))
527 /* This is a very minimal lookup from controller names to
528 * paths. Since we have mounted most hierarchies ourselves
529 * should be kinda safe, but eventually we might want to
530 * extend this to have a fallback to actually check
531 * /proc/mounts. Might need caching then. */
533 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
535 else if (startswith(controller
, "name="))
541 t
= join("/sys/fs/cgroup/", p
, "/", path
, "/", suffix
, NULL
);
543 t
= join("/sys/fs/cgroup/", p
, "/", path
, NULL
);
545 t
= join("/sys/fs/cgroup/", p
, "/", suffix
, NULL
);
547 t
= join("/sys/fs/cgroup/", p
, NULL
);
552 path_kill_slashes(t
);
558 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
562 if (typeflag
!= FTW_DP
)
565 if (ftwbuf
->level
< 1)
568 p
= strappend(path
, "/tasks");
574 is_sticky
= file_is_priv_sticky(p
) > 0;
584 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
591 r
= cg_get_path(controller
, path
, NULL
, &fs
);
596 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) < 0)
597 r
= errno
? -errno
: -EIO
;
603 p
= strappend(fs
, "/tasks");
609 is_sticky
= file_is_priv_sticky(p
) > 0;
613 if (rmdir(fs
) < 0 && errno
!= ENOENT
) {
624 int cg_delete(const char *controller
, const char *path
) {
631 if ((r
= parent_of_path(path
, &parent
)) < 0)
634 r
= cg_migrate_recursive(controller
, path
, parent
, false, true);
637 return r
== -ENOENT
? 0 : r
;
640 int cg_create(const char *controller
, const char *path
) {
647 if ((r
= cg_get_path(controller
, path
, NULL
, &fs
)) < 0)
650 r
= mkdir_parents(fs
, 0755);
653 if (mkdir(fs
, 0755) >= 0)
655 else if (errno
== EEXIST
)
666 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
675 if ((r
= cg_get_path(controller
, path
, "tasks", &fs
)) < 0)
681 snprintf(c
, sizeof(c
), "%lu\n", (unsigned long) pid
);
684 r
= write_one_line_file(fs
, c
);
690 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
697 if ((r
= cg_create(controller
, path
)) < 0)
700 if ((q
= cg_attach(controller
, path
, pid
)) < 0)
703 /* This does not remove the cgroup on failure */
708 int cg_set_group_access(const char *controller
, const char *path
, mode_t mode
, uid_t uid
, gid_t gid
) {
715 if (mode
!= (mode_t
) -1)
718 r
= cg_get_path(controller
, path
, NULL
, &fs
);
722 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
728 int cg_set_task_access(const char *controller
, const char *path
, mode_t mode
, uid_t uid
, gid_t gid
, int sticky
) {
735 if (mode
== (mode_t
) -1 && uid
== (uid_t
) -1 && gid
== (gid_t
) -1 && sticky
< 0)
738 if (mode
!= (mode_t
) -1)
741 r
= cg_get_path(controller
, path
, "tasks", &fs
);
745 if (sticky
>= 0 && mode
!= (mode_t
) -1)
746 /* Both mode and sticky param are passed */
747 mode
|= (sticky
? S_ISVTX
: 0);
748 else if ((sticky
>= 0 && mode
== (mode_t
) -1) ||
749 (mode
!= (mode_t
) -1 && sticky
< 0)) {
752 /* Only one param is passed, hence read the current
753 * mode from the file itself */
761 if (mode
== (mode_t
) -1)
762 /* No mode set, we just shall set the sticky bit */
763 mode
= (st
.st_mode
& ~S_ISVTX
) | (sticky
? S_ISVTX
: 0);
765 /* Only mode set, leave sticky bit untouched */
766 mode
= (st
.st_mode
& ~0777) | mode
;
769 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
775 int cg_get_by_pid(const char *controller
, pid_t pid
, char **path
) {
789 if (asprintf(&fs
, "/proc/%lu/cgroup", (unsigned long) pid
) < 0)
796 return errno
== ENOENT
? -ESRCH
: -errno
;
798 cs
= strlen(controller
);
805 if (!(fgets(line
, sizeof(line
), f
))) {
809 r
= errno
? -errno
: -EIO
;
815 if (!(l
= strchr(line
, ':')))
819 if (strncmp(l
, controller
, cs
) != 0)
825 if (!(p
= strdup(l
+ cs
+ 1))) {
843 int cg_install_release_agent(const char *controller
, const char *agent
) {
844 char *fs
= NULL
, *contents
= NULL
, *line
= NULL
, *sc
;
850 if ((r
= cg_get_path(controller
, NULL
, "release_agent", &fs
)) < 0)
853 if ((r
= read_one_line_file(fs
, &contents
)) < 0)
856 sc
= strstrip(contents
);
859 if (asprintf(&line
, "%s\n", agent
) < 0) {
864 if ((r
= write_one_line_file(fs
, line
)) < 0)
867 } else if (!streq(sc
, agent
)) {
874 if ((r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
)) < 0)
879 if ((r
= read_one_line_file(fs
, &contents
)) < 0)
882 sc
= strstrip(contents
);
884 if (streq(sc
, "0")) {
885 if ((r
= write_one_line_file(fs
, "1\n")) < 0)
889 } else if (!streq(sc
, "1")) {
903 int cg_is_empty(const char *controller
, const char *path
, bool ignore_self
) {
912 if ((r
= cg_enumerate_tasks(controller
, path
, &f
)) < 0)
913 return r
== -ENOENT
? 1 : r
;
915 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
917 if (ignore_self
&& pid
== getpid())
932 int cg_is_empty_recursive(const char *controller
, const char *path
, bool ignore_self
) {
940 if ((r
= cg_is_empty(controller
, path
, ignore_self
)) <= 0)
943 if ((r
= cg_enumerate_subgroups(controller
, path
, &d
)) < 0)
944 return r
== -ENOENT
? 1 : r
;
946 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
949 r
= asprintf(&p
, "%s/%s", path
, fn
);
957 r
= cg_is_empty_recursive(controller
, p
, ignore_self
);
975 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
977 char *t
= NULL
, *u
= NULL
;
980 assert(controller
|| path
);
985 if (!(t
= strdup(spec
)))
997 if (!(e
= strchr(spec
, ':'))) {
999 if (strchr(spec
, '/') || spec
[0] == 0)
1003 if (!(t
= strdup(spec
)))
1017 memchr(spec
, '/', e
-spec
))
1021 if (!(t
= strndup(spec
, e
-spec
)))
1025 if (!(u
= strdup(e
+1))) {
1039 int cg_join_spec(const char *controller
, const char *path
, char **spec
) {
1043 if (!path_is_absolute(path
) ||
1044 controller
[0] == 0 ||
1045 strchr(controller
, ':') ||
1046 strchr(controller
, '/'))
1049 if (asprintf(spec
, "%s:%s", controller
, path
) < 0)
1055 int cg_fix_path(const char *path
, char **result
) {
1062 /* First check if it already is a filesystem path */
1063 if (path_is_absolute(path
) &&
1064 path_startswith(path
, "/sys/fs/cgroup") &&
1065 access(path
, F_OK
) >= 0) {
1067 if (!(t
= strdup(path
)))
1074 /* Otherwise treat it as cg spec */
1075 if ((r
= cg_split_spec(path
, &c
, &p
)) < 0)
1078 r
= cg_get_path(c
? c
: SYSTEMD_CGROUP_CONTROLLER
, p
? p
: "/", NULL
, result
);
1085 int cg_get_user_path(char **path
) {
1090 /* Figure out the place to put user cgroups below. We use the
1091 * same as PID 1 has but with the "/system" suffix replaced by
1094 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER
, 1, &root
) < 0)
1095 p
= strdup("/user");
1097 if (endswith(root
, "/system"))
1098 root
[strlen(root
) - 7] = 0;
1099 else if (streq(root
, "/"))
1102 p
= strappend(root
, "/user");