1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "extract-word.h"
34 #include "formats-util.h"
35 #include "login-util.h"
38 #include "path-util.h"
39 #include "process-util.h"
42 #include "string-util.h"
43 #include "unit-name.h"
45 #include "cgroup-util.h"
47 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
48 _cleanup_free_
char *fs
= NULL
;
54 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
66 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
69 /* Note that the cgroup.procs might contain duplicates! See
70 * cgroups.txt for details. */
76 if (fscanf(f
, "%lu", &ul
) != 1) {
81 return errno
? -errno
: -EIO
;
91 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
92 _cleanup_free_
char *fs
= NULL
;
98 /* This is not recursive! */
100 r
= cg_get_path(controller
, path
, NULL
, &fs
);
112 int cg_read_subgroup(DIR *d
, char **fn
) {
118 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
121 if (de
->d_type
!= DT_DIR
)
124 if (streq(de
->d_name
, ".") ||
125 streq(de
->d_name
, ".."))
128 b
= strdup(de
->d_name
);
139 int cg_rmdir(const char *controller
, const char *path
) {
140 _cleanup_free_
char *p
= NULL
;
143 r
= cg_get_path(controller
, path
, NULL
, &p
);
148 if (r
< 0 && errno
!= ENOENT
)
154 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
155 _cleanup_set_free_ Set
*allocated_set
= NULL
;
162 /* This goes through the tasks list and kills them all. This
163 * is repeated until no further processes are added to the
164 * tasks list, to properly handle forking processes */
167 s
= allocated_set
= set_new(NULL
);
175 _cleanup_fclose_
FILE *f
= NULL
;
179 r
= cg_enumerate_processes(controller
, path
, &f
);
181 if (ret
>= 0 && r
!= -ENOENT
)
187 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
189 if (ignore_self
&& pid
== my_pid
)
192 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
195 /* If we haven't killed this process yet, kill
197 if (kill(pid
, sig
) < 0) {
198 if (ret
>= 0 && errno
!= ESRCH
)
201 if (sigcont
&& sig
!= SIGKILL
)
202 (void) kill(pid
, SIGCONT
);
210 r
= set_put(s
, PID_TO_PTR(pid
));
226 /* To avoid racing against processes which fork
227 * quicker than we can kill them we repeat this until
228 * no new pids need to be killed. */
235 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
236 _cleanup_set_free_ Set
*allocated_set
= NULL
;
237 _cleanup_closedir_
DIR *d
= NULL
;
245 s
= allocated_set
= set_new(NULL
);
250 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
252 r
= cg_enumerate_subgroups(controller
, path
, &d
);
254 if (ret
>= 0 && r
!= -ENOENT
)
260 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
261 _cleanup_free_
char *p
= NULL
;
263 p
= strjoin(path
, "/", fn
, NULL
);
268 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
269 if (r
!= 0 && ret
>= 0)
273 if (ret
>= 0 && r
< 0)
277 r
= cg_rmdir(controller
, path
);
278 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
285 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
287 _cleanup_set_free_ Set
*s
= NULL
;
303 _cleanup_fclose_
FILE *f
= NULL
;
307 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
309 if (ret
>= 0 && r
!= -ENOENT
)
315 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
317 /* This might do weird stuff if we aren't a
318 * single-threaded program. However, we
319 * luckily know we are not */
320 if (ignore_self
&& pid
== my_pid
)
323 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
326 /* Ignore kernel threads. Since they can only
327 * exist in the root cgroup, we only check for
330 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
331 is_kernel_thread(pid
) > 0)
334 r
= cg_attach(cto
, pto
, pid
);
336 if (ret
>= 0 && r
!= -ESRCH
)
343 r
= set_put(s
, PID_TO_PTR(pid
));
363 int cg_migrate_recursive(
371 _cleanup_closedir_
DIR *d
= NULL
;
380 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
382 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
384 if (ret
>= 0 && r
!= -ENOENT
)
390 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
391 _cleanup_free_
char *p
= NULL
;
393 p
= strjoin(pfrom
, "/", fn
, NULL
);
398 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
399 if (r
!= 0 && ret
>= 0)
403 if (r
< 0 && ret
>= 0)
407 r
= cg_rmdir(cfrom
, pfrom
);
408 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
415 int cg_migrate_recursive_fallback(
430 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
432 char prefix
[strlen(pto
) + 1];
434 /* This didn't work? Then let's try all prefixes of the destination */
436 PATH_FOREACH_PREFIX(prefix
, pto
) {
439 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
448 static const char *controller_to_dirname(const char *controller
) {
453 /* Converts a controller name to the directory name below
454 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
455 * just cuts off the name= prefixed used for named
456 * hierarchies, if it is specified. */
458 e
= startswith(controller
, "name=");
465 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
472 dn
= controller_to_dirname(controller
);
474 if (isempty(path
) && isempty(suffix
))
475 t
= strappend("/sys/fs/cgroup/", dn
);
476 else if (isempty(path
))
477 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
478 else if (isempty(suffix
))
479 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
481 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
489 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
494 if (isempty(path
) && isempty(suffix
))
495 t
= strdup("/sys/fs/cgroup");
496 else if (isempty(path
))
497 t
= strappend("/sys/fs/cgroup/", suffix
);
498 else if (isempty(suffix
))
499 t
= strappend("/sys/fs/cgroup/", path
);
501 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
509 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
517 /* If no controller is specified, we return the path
518 * *below* the controllers, without any prefix. */
520 if (!path
&& !suffix
)
528 t
= strjoin(path
, "/", suffix
, NULL
);
532 *fs
= path_kill_slashes(t
);
536 if (!cg_controller_is_valid(controller
))
539 unified
= cg_unified();
544 r
= join_path_unified(path
, suffix
, fs
);
546 r
= join_path_legacy(controller
, path
, suffix
, fs
);
550 path_kill_slashes(*fs
);
554 static int controller_is_accessible(const char *controller
) {
559 /* Checks whether a specific controller is accessible,
560 * i.e. its hierarchy mounted. In the unified hierarchy all
561 * controllers are considered accessible, except for the named
564 if (!cg_controller_is_valid(controller
))
567 unified
= cg_unified();
571 /* We don't support named hierarchies if we are using
572 * the unified hierarchy. */
574 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
577 if (startswith(controller
, "name="))
583 dn
= controller_to_dirname(controller
);
584 cc
= strjoina("/sys/fs/cgroup/", dn
);
586 if (laccess(cc
, F_OK
) < 0)
593 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
599 /* Check if the specified controller is actually accessible */
600 r
= controller_is_accessible(controller
);
604 return cg_get_path(controller
, path
, suffix
, fs
);
607 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
612 if (typeflag
!= FTW_DP
)
615 if (ftwbuf
->level
< 1)
622 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
623 _cleanup_free_
char *fs
= NULL
;
628 r
= cg_get_path(controller
, path
, NULL
, &fs
);
633 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
643 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
650 int cg_create(const char *controller
, const char *path
) {
651 _cleanup_free_
char *fs
= NULL
;
654 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
658 r
= mkdir_parents(fs
, 0755);
662 if (mkdir(fs
, 0755) < 0) {
673 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
678 r
= cg_create(controller
, path
);
682 q
= cg_attach(controller
, path
, pid
);
686 /* This does not remove the cgroup on failure */
690 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
691 _cleanup_free_
char *fs
= NULL
;
692 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
698 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
705 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
707 return write_string_file(fs
, c
, 0);
710 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
717 r
= cg_attach(controller
, path
, pid
);
719 char prefix
[strlen(path
) + 1];
721 /* This didn't work? Then let's try all prefixes of
724 PATH_FOREACH_PREFIX(prefix
, path
) {
727 q
= cg_attach(controller
, prefix
, pid
);
736 int cg_set_group_access(
737 const char *controller
,
743 _cleanup_free_
char *fs
= NULL
;
746 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
749 if (mode
!= MODE_INVALID
)
752 r
= cg_get_path(controller
, path
, NULL
, &fs
);
756 return chmod_and_chown(fs
, mode
, uid
, gid
);
759 int cg_set_task_access(
760 const char *controller
,
766 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
771 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
774 if (mode
!= MODE_INVALID
)
777 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
781 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
785 unified
= cg_unified();
791 /* Compatibility, Always keep values for "tasks" in sync with
793 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
794 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
799 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
800 _cleanup_fclose_
FILE *f
= NULL
;
809 unified
= cg_unified();
814 if (!cg_controller_is_valid(controller
))
817 controller
= SYSTEMD_CGROUP_CONTROLLER
;
819 cs
= strlen(controller
);
822 fs
= procfs_file_alloca(pid
, "cgroup");
825 return errno
== ENOENT
? -ESRCH
: -errno
;
827 FOREACH_LINE(line
, f
, return -errno
) {
833 e
= startswith(line
, "0:");
843 const char *word
, *state
;
846 l
= strchr(line
, ':');
856 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
857 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
878 int cg_install_release_agent(const char *controller
, const char *agent
) {
879 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
885 unified
= cg_unified();
888 if (unified
) /* doesn't apply to unified hierarchy */
891 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
895 r
= read_one_line_file(fs
, &contents
);
899 sc
= strstrip(contents
);
901 r
= write_string_file(fs
, agent
, 0);
904 } else if (!path_equal(sc
, agent
))
908 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
912 contents
= mfree(contents
);
913 r
= read_one_line_file(fs
, &contents
);
917 sc
= strstrip(contents
);
918 if (streq(sc
, "0")) {
919 r
= write_string_file(fs
, "1", 0);
932 int cg_uninstall_release_agent(const char *controller
) {
933 _cleanup_free_
char *fs
= NULL
;
936 unified
= cg_unified();
939 if (unified
) /* Doesn't apply to unified hierarchy */
942 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
946 r
= write_string_file(fs
, "0", 0);
952 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
956 r
= write_string_file(fs
, "", 0);
963 int cg_is_empty(const char *controller
, const char *path
) {
964 _cleanup_fclose_
FILE *f
= NULL
;
970 r
= cg_enumerate_processes(controller
, path
, &f
);
976 r
= cg_read_pid(f
, &pid
);
983 int cg_is_empty_recursive(const char *controller
, const char *path
) {
988 /* The root cgroup is always populated */
989 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
992 unified
= cg_unified();
997 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
999 /* On the unified hierarchy we can check empty state
1000 * via the "cgroup.populated" attribute. */
1002 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1006 r
= read_one_line_file(populated
, &t
);
1012 return streq(t
, "0");
1014 _cleanup_closedir_
DIR *d
= NULL
;
1017 r
= cg_is_empty(controller
, path
);
1021 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1027 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1028 _cleanup_free_
char *p
= NULL
;
1030 p
= strjoin(path
, "/", fn
, NULL
);
1035 r
= cg_is_empty_recursive(controller
, p
);
1046 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1047 char *t
= NULL
, *u
= NULL
;
1053 if (!path_is_safe(spec
))
1061 *path
= path_kill_slashes(t
);
1070 e
= strchr(spec
, ':');
1072 if (!cg_controller_is_valid(spec
))
1089 t
= strndup(spec
, e
-spec
);
1092 if (!cg_controller_is_valid(t
)) {
1106 if (!path_is_safe(u
) ||
1107 !path_is_absolute(u
)) {
1113 path_kill_slashes(u
);
1129 int cg_mangle_path(const char *path
, char **result
) {
1130 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1137 /* First, check if it already is a filesystem path */
1138 if (path_startswith(path
, "/sys/fs/cgroup")) {
1144 *result
= path_kill_slashes(t
);
1148 /* Otherwise, treat it as cg spec */
1149 r
= cg_split_spec(path
, &c
, &p
);
1153 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1156 int cg_get_root_path(char **path
) {
1162 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1166 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1168 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1170 e
= endswith(p
, "/system"); /* even more legacy */
1178 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1179 _cleanup_free_
char *rt
= NULL
;
1187 /* If the root was specified let's use that, otherwise
1188 * let's determine it from PID 1 */
1190 r
= cg_get_root_path(&rt
);
1197 p
= path_startswith(cgroup
, root
);
1198 if (p
&& p
> cgroup
)
1206 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1207 _cleanup_free_
char *raw
= NULL
;
1214 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1218 r
= cg_shift_path(raw
, root
, &c
);
1238 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1245 n
= strcspn(cgroup
, "/");
1249 c
= strndupa(cgroup
, n
);
1252 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1263 static bool valid_slice_name(const char *p
, size_t n
) {
1268 if (n
< strlen("x.slice"))
1271 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1277 c
= cg_unescape(buf
);
1279 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1285 static const char *skip_slices(const char *p
) {
1288 /* Skips over all slice assignments */
1293 p
+= strspn(p
, "/");
1295 n
= strcspn(p
, "/");
1296 if (!valid_slice_name(p
, n
))
1303 int cg_path_get_unit(const char *path
, char **ret
) {
1311 e
= skip_slices(path
);
1313 r
= cg_path_decode_unit(e
, &unit
);
1317 /* We skipped over the slices, don't accept any now */
1318 if (endswith(unit
, ".slice")) {
1327 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1328 _cleanup_free_
char *cgroup
= NULL
;
1333 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1337 return cg_path_get_unit(cgroup
, unit
);
1341 * Skip session-*.scope, but require it to be there.
1343 static const char *skip_session(const char *p
) {
1349 p
+= strspn(p
, "/");
1351 n
= strcspn(p
, "/");
1352 if (n
< strlen("session-x.scope"))
1355 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1356 char buf
[n
- 8 - 6 + 1];
1358 memcpy(buf
, p
+ 8, n
- 8 - 6);
1361 /* Note that session scopes never need unescaping,
1362 * since they cannot conflict with the kernel's own
1363 * names, hence we don't need to call cg_unescape()
1366 if (!session_id_valid(buf
))
1370 p
+= strspn(p
, "/");
1378 * Skip user@*.service, but require it to be there.
1380 static const char *skip_user_manager(const char *p
) {
1386 p
+= strspn(p
, "/");
1388 n
= strcspn(p
, "/");
1389 if (n
< strlen("user@x.service"))
1392 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1393 char buf
[n
- 5 - 8 + 1];
1395 memcpy(buf
, p
+ 5, n
- 5 - 8);
1398 /* Note that user manager services never need unescaping,
1399 * since they cannot conflict with the kernel's own
1400 * names, hence we don't need to call cg_unescape()
1403 if (parse_uid(buf
, NULL
) < 0)
1407 p
+= strspn(p
, "/");
1415 static const char *skip_user_prefix(const char *path
) {
1420 /* Skip slices, if there are any */
1421 e
= skip_slices(path
);
1423 /* Skip the user manager, if it's in the path now... */
1424 t
= skip_user_manager(e
);
1428 /* Alternatively skip the user session if it is in the path... */
1429 return skip_session(e
);
1432 int cg_path_get_user_unit(const char *path
, char **ret
) {
1438 t
= skip_user_prefix(path
);
1442 /* And from here on it looks pretty much the same as for a
1443 * system unit, hence let's use the same parser from here
1445 return cg_path_get_unit(t
, ret
);
1448 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1449 _cleanup_free_
char *cgroup
= NULL
;
1454 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1458 return cg_path_get_user_unit(cgroup
, unit
);
1461 int cg_path_get_machine_name(const char *path
, char **machine
) {
1462 _cleanup_free_
char *u
= NULL
;
1466 r
= cg_path_get_unit(path
, &u
);
1470 sl
= strjoina("/run/systemd/machines/unit:", u
);
1471 return readlink_malloc(sl
, machine
);
1474 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1475 _cleanup_free_
char *cgroup
= NULL
;
1480 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1484 return cg_path_get_machine_name(cgroup
, machine
);
1487 int cg_path_get_session(const char *path
, char **session
) {
1488 _cleanup_free_
char *unit
= NULL
;
1494 r
= cg_path_get_unit(path
, &unit
);
1498 start
= startswith(unit
, "session-");
1501 end
= endswith(start
, ".scope");
1506 if (!session_id_valid(start
))
1522 int cg_pid_get_session(pid_t pid
, char **session
) {
1523 _cleanup_free_
char *cgroup
= NULL
;
1526 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1530 return cg_path_get_session(cgroup
, session
);
1533 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1534 _cleanup_free_
char *slice
= NULL
;
1540 r
= cg_path_get_slice(path
, &slice
);
1544 start
= startswith(slice
, "user-");
1547 end
= endswith(start
, ".slice");
1552 if (parse_uid(start
, uid
) < 0)
1558 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1559 _cleanup_free_
char *cgroup
= NULL
;
1562 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1566 return cg_path_get_owner_uid(cgroup
, uid
);
1569 int cg_path_get_slice(const char *p
, char **slice
) {
1570 const char *e
= NULL
;
1575 /* Finds the right-most slice unit from the beginning, but
1576 * stops before we come to the first non-slice unit. */
1581 p
+= strspn(p
, "/");
1583 n
= strcspn(p
, "/");
1584 if (!valid_slice_name(p
, n
)) {
1589 s
= strdup("-.slice");
1597 return cg_path_decode_unit(e
, slice
);
1605 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1606 _cleanup_free_
char *cgroup
= NULL
;
1611 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1615 return cg_path_get_slice(cgroup
, slice
);
1618 int cg_path_get_user_slice(const char *p
, char **slice
) {
1623 t
= skip_user_prefix(p
);
1627 /* And now it looks pretty much the same as for a system
1628 * slice, so let's just use the same parser from here on. */
1629 return cg_path_get_slice(t
, slice
);
1632 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1633 _cleanup_free_
char *cgroup
= NULL
;
1638 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1642 return cg_path_get_user_slice(cgroup
, slice
);
1645 char *cg_escape(const char *p
) {
1646 bool need_prefix
= false;
1648 /* This implements very minimal escaping for names to be used
1649 * as file names in the cgroup tree: any name which might
1650 * conflict with a kernel name or is prefixed with '_' is
1651 * prefixed with a '_'. That way, when reading cgroup names it
1652 * is sufficient to remove a single prefixing underscore if
1655 /* The return value of this function (unlike cg_unescape())
1661 streq(p
, "notify_on_release") ||
1662 streq(p
, "release_agent") ||
1663 streq(p
, "tasks") ||
1664 startswith(p
, "cgroup."))
1669 dot
= strrchr(p
, '.');
1674 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1677 n
= cgroup_controller_to_string(c
);
1682 if (memcmp(p
, n
, l
) != 0)
1692 return strappend("_", p
);
1697 char *cg_unescape(const char *p
) {
1700 /* The return value of this function (unlike cg_escape())
1701 * doesn't need free()! */
1709 #define CONTROLLER_VALID \
1713 bool cg_controller_is_valid(const char *p
) {
1719 s
= startswith(p
, "name=");
1723 if (*p
== 0 || *p
== '_')
1726 for (t
= p
; *t
; t
++)
1727 if (!strchr(CONTROLLER_VALID
, *t
))
1730 if (t
- p
> FILENAME_MAX
)
1736 int cg_slice_to_path(const char *unit
, char **ret
) {
1737 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1744 if (streq(unit
, "-.slice")) {
1754 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1757 if (!endswith(unit
, ".slice"))
1760 r
= unit_name_to_prefix(unit
, &p
);
1764 dash
= strchr(p
, '-');
1766 /* Don't allow initial dashes */
1771 _cleanup_free_
char *escaped
= NULL
;
1772 char n
[dash
- p
+ sizeof(".slice")];
1774 /* Don't allow trailing or double dashes */
1775 if (dash
[1] == 0 || dash
[1] == '-')
1778 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1779 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1782 escaped
= cg_escape(n
);
1786 if (!strextend(&s
, escaped
, "/", NULL
))
1789 dash
= strchr(dash
+1, '-');
1792 e
= cg_escape(unit
);
1796 if (!strextend(&s
, e
, NULL
))
1805 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1806 _cleanup_free_
char *p
= NULL
;
1809 r
= cg_get_path(controller
, path
, attribute
, &p
);
1813 return write_string_file(p
, value
, 0);
1816 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1817 _cleanup_free_
char *p
= NULL
;
1820 r
= cg_get_path(controller
, path
, attribute
, &p
);
1824 return read_one_line_file(p
, ret
);
1827 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1831 /* This one will create a cgroup in our private tree, but also
1832 * duplicate it in the trees specified in mask, and remove it
1835 /* First create the cgroup in our own hierarchy. */
1836 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1840 /* If we are in the unified hierarchy, we are done now */
1841 unified
= cg_unified();
1847 /* Otherwise, do the same in the other hierarchies */
1848 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1849 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1852 n
= cgroup_controller_to_string(c
);
1855 (void) cg_create(n
, path
);
1856 else if (supported
& bit
)
1857 (void) cg_trim(n
, path
, true);
1863 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1867 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1871 unified
= cg_unified();
1877 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1878 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1879 const char *p
= NULL
;
1881 if (!(supported
& bit
))
1885 p
= path_callback(bit
, userdata
);
1890 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1896 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1901 SET_FOREACH(pidp
, pids
, i
) {
1902 pid_t pid
= PTR_TO_PID(pidp
);
1905 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1906 if (q
< 0 && r
>= 0)
1913 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1917 if (!path_equal(from
, to
)) {
1918 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1923 unified
= cg_unified();
1929 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1930 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1931 const char *p
= NULL
;
1933 if (!(supported
& bit
))
1937 p
= to_callback(bit
, userdata
);
1942 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1948 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1952 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1956 unified
= cg_unified();
1962 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1963 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1965 if (!(supported
& bit
))
1968 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1974 int cg_mask_supported(CGroupMask
*ret
) {
1975 CGroupMask mask
= 0;
1978 /* Determines the mask of supported cgroup controllers. Only
1979 * includes controllers we can make sense of and that are
1980 * actually accessible. */
1982 unified
= cg_unified();
1986 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1989 /* In the unified hierarchy we can read the supported
1990 * and accessible controllers from a the top-level
1991 * cgroup attribute */
1993 r
= cg_get_root_path(&root
);
1997 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2001 r
= read_one_line_file(path
, &controllers
);
2007 _cleanup_free_
char *n
= NULL
;
2010 r
= extract_first_word(&c
, &n
, NULL
, 0);
2016 v
= cgroup_controller_from_string(n
);
2020 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2023 /* Currently, we only support the memory and pids
2024 * controller in the unified hierarchy, mask
2025 * everything else off. */
2026 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2031 /* In the legacy hierarchy, we check whether which
2032 * hierarchies are mounted. */
2034 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2037 n
= cgroup_controller_to_string(c
);
2038 if (controller_is_accessible(n
) >= 0)
2039 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2047 int cg_kernel_controllers(Set
*controllers
) {
2048 _cleanup_fclose_
FILE *f
= NULL
;
2052 assert(controllers
);
2054 /* Determines the full list of kernel-known controllers. Might
2055 * include controllers we don't actually support, arbitrary
2056 * named hierarchies and controllers that aren't currently
2057 * accessible (because not mounted). */
2059 f
= fopen("/proc/cgroups", "re");
2061 if (errno
== ENOENT
)
2066 /* Ignore the header line */
2067 (void) fgets(buf
, sizeof(buf
), f
);
2074 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2079 if (ferror(f
) && errno
!= 0)
2090 if (!cg_controller_is_valid(controller
)) {
2095 r
= set_consume(controllers
, controller
);
2103 static thread_local
int unified_cache
= -1;
2105 int cg_unified(void) {
2108 /* Checks if we support the unified hierarchy. Returns an
2109 * error when the cgroup hierarchies aren't mounted yet or we
2110 * have any other trouble determining if the unified hierarchy
2113 if (unified_cache
>= 0)
2114 return unified_cache
;
2116 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2119 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2120 unified_cache
= true;
2121 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2122 unified_cache
= false;
2126 return unified_cache
;
2129 void cg_unified_flush(void) {
2133 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2134 _cleanup_free_
char *fs
= NULL
;
2143 unified
= cg_unified();
2146 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2149 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2153 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2154 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2157 if (!(supported
& bit
))
2160 n
= cgroup_controller_to_string(c
);
2162 char s
[1 + strlen(n
) + 1];
2164 s
[0] = mask
& bit
? '+' : '-';
2167 r
= write_string_file(fs
, s
, 0);
2169 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2176 bool cg_is_unified_wanted(void) {
2177 static thread_local
int wanted
= -1;
2180 /* If the hierarchy is already mounted, then follow whatever
2181 * was chosen for it. */
2182 unified
= cg_unified();
2186 /* Otherwise, let's see what the kernel command line has to
2187 * say. Since checking that is expensive, let's cache the
2192 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2194 return (wanted
= true);
2196 _cleanup_free_
char *value
= NULL
;
2198 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2202 return (wanted
= false);
2204 return (wanted
= parse_boolean(value
) > 0);
2208 bool cg_is_legacy_wanted(void) {
2209 return !cg_is_unified_wanted();
2212 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2217 *ret
= CGROUP_CPU_SHARES_INVALID
;
2221 r
= safe_atou64(s
, &u
);
2225 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2232 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2237 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2241 r
= safe_atou64(s
, &u
);
2245 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2252 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2253 [CGROUP_CONTROLLER_CPU
] = "cpu",
2254 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2255 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2256 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2257 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2258 [CGROUP_CONTROLLER_PIDS
] = "pids",
2259 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2262 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);