1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
35 #include "formats-util.h"
36 #include "process-util.h"
37 #include "path-util.h"
38 #include "unit-name.h"
42 #include "login-util.h"
43 #include "cgroup-util.h"
45 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
46 _cleanup_free_
char *fs
= NULL
;
52 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
64 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
67 /* Note that the cgroup.procs might contain duplicates! See
68 * cgroups.txt for details. */
74 if (fscanf(f
, "%lu", &ul
) != 1) {
79 return errno
? -errno
: -EIO
;
89 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
90 _cleanup_free_
char *fs
= NULL
;
96 /* This is not recursive! */
98 r
= cg_get_path(controller
, path
, NULL
, &fs
);
110 int cg_read_subgroup(DIR *d
, char **fn
) {
116 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
119 if (de
->d_type
!= DT_DIR
)
122 if (streq(de
->d_name
, ".") ||
123 streq(de
->d_name
, ".."))
126 b
= strdup(de
->d_name
);
137 int cg_rmdir(const char *controller
, const char *path
) {
138 _cleanup_free_
char *p
= NULL
;
141 r
= cg_get_path(controller
, path
, NULL
, &p
);
146 if (r
< 0 && errno
!= ENOENT
)
152 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
153 _cleanup_set_free_ Set
*allocated_set
= NULL
;
160 /* This goes through the tasks list and kills them all. This
161 * is repeated until no further processes are added to the
162 * tasks list, to properly handle forking processes */
165 s
= allocated_set
= set_new(NULL
);
173 _cleanup_fclose_
FILE *f
= NULL
;
177 r
= cg_enumerate_processes(controller
, path
, &f
);
179 if (ret
>= 0 && r
!= -ENOENT
)
185 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
187 if (ignore_self
&& pid
== my_pid
)
190 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
193 /* If we haven't killed this process yet, kill
195 if (kill(pid
, sig
) < 0) {
196 if (ret
>= 0 && errno
!= ESRCH
)
199 if (sigcont
&& sig
!= SIGKILL
)
200 (void) kill(pid
, SIGCONT
);
208 r
= set_put(s
, PID_TO_PTR(pid
));
224 /* To avoid racing against processes which fork
225 * quicker than we can kill them we repeat this until
226 * no new pids need to be killed. */
233 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
234 _cleanup_set_free_ Set
*allocated_set
= NULL
;
235 _cleanup_closedir_
DIR *d
= NULL
;
243 s
= allocated_set
= set_new(NULL
);
248 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
250 r
= cg_enumerate_subgroups(controller
, path
, &d
);
252 if (ret
>= 0 && r
!= -ENOENT
)
258 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
259 _cleanup_free_
char *p
= NULL
;
261 p
= strjoin(path
, "/", fn
, NULL
);
266 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
267 if (r
!= 0 && ret
>= 0)
271 if (ret
>= 0 && r
< 0)
275 r
= cg_rmdir(controller
, path
);
276 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
283 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
285 _cleanup_set_free_ Set
*s
= NULL
;
301 _cleanup_fclose_
FILE *f
= NULL
;
305 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
307 if (ret
>= 0 && r
!= -ENOENT
)
313 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
315 /* This might do weird stuff if we aren't a
316 * single-threaded program. However, we
317 * luckily know we are not */
318 if (ignore_self
&& pid
== my_pid
)
321 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
324 /* Ignore kernel threads. Since they can only
325 * exist in the root cgroup, we only check for
328 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
329 is_kernel_thread(pid
) > 0)
332 r
= cg_attach(cto
, pto
, pid
);
334 if (ret
>= 0 && r
!= -ESRCH
)
341 r
= set_put(s
, PID_TO_PTR(pid
));
361 int cg_migrate_recursive(
369 _cleanup_closedir_
DIR *d
= NULL
;
378 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
380 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
382 if (ret
>= 0 && r
!= -ENOENT
)
388 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
389 _cleanup_free_
char *p
= NULL
;
391 p
= strjoin(pfrom
, "/", fn
, NULL
);
396 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
397 if (r
!= 0 && ret
>= 0)
401 if (r
< 0 && ret
>= 0)
405 r
= cg_rmdir(cfrom
, pfrom
);
406 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
413 int cg_migrate_recursive_fallback(
428 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
430 char prefix
[strlen(pto
) + 1];
432 /* This didn't work? Then let's try all prefixes of the destination */
434 PATH_FOREACH_PREFIX(prefix
, pto
) {
437 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
446 static const char *controller_to_dirname(const char *controller
) {
451 /* Converts a controller name to the directory name below
452 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
453 * just cuts off the name= prefixed used for named
454 * hierarchies, if it is specified. */
456 e
= startswith(controller
, "name=");
463 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
470 dn
= controller_to_dirname(controller
);
472 if (isempty(path
) && isempty(suffix
))
473 t
= strappend("/sys/fs/cgroup/", dn
);
474 else if (isempty(path
))
475 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
476 else if (isempty(suffix
))
477 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
479 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
487 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
492 if (isempty(path
) && isempty(suffix
))
493 t
= strdup("/sys/fs/cgroup");
494 else if (isempty(path
))
495 t
= strappend("/sys/fs/cgroup/", suffix
);
496 else if (isempty(suffix
))
497 t
= strappend("/sys/fs/cgroup/", path
);
499 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
507 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
515 /* If no controller is specified, we return the path
516 * *below* the controllers, without any prefix. */
518 if (!path
&& !suffix
)
526 t
= strjoin(path
, "/", suffix
, NULL
);
530 *fs
= path_kill_slashes(t
);
534 if (!cg_controller_is_valid(controller
))
537 unified
= cg_unified();
542 r
= join_path_unified(path
, suffix
, fs
);
544 r
= join_path_legacy(controller
, path
, suffix
, fs
);
548 path_kill_slashes(*fs
);
552 static int controller_is_accessible(const char *controller
) {
557 /* Checks whether a specific controller is accessible,
558 * i.e. its hierarchy mounted. In the unified hierarchy all
559 * controllers are considered accessible, except for the named
562 if (!cg_controller_is_valid(controller
))
565 unified
= cg_unified();
569 /* We don't support named hierarchies if we are using
570 * the unified hierarchy. */
572 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
575 if (startswith(controller
, "name="))
581 dn
= controller_to_dirname(controller
);
582 cc
= strjoina("/sys/fs/cgroup/", dn
);
584 if (laccess(cc
, F_OK
) < 0)
591 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
597 /* Check if the specified controller is actually accessible */
598 r
= controller_is_accessible(controller
);
602 return cg_get_path(controller
, path
, suffix
, fs
);
605 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
610 if (typeflag
!= FTW_DP
)
613 if (ftwbuf
->level
< 1)
620 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
621 _cleanup_free_
char *fs
= NULL
;
626 r
= cg_get_path(controller
, path
, NULL
, &fs
);
631 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
641 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
648 int cg_create(const char *controller
, const char *path
) {
649 _cleanup_free_
char *fs
= NULL
;
652 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
656 r
= mkdir_parents(fs
, 0755);
660 if (mkdir(fs
, 0755) < 0) {
671 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
676 r
= cg_create(controller
, path
);
680 q
= cg_attach(controller
, path
, pid
);
684 /* This does not remove the cgroup on failure */
688 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
689 _cleanup_free_
char *fs
= NULL
;
690 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
696 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
703 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
705 return write_string_file(fs
, c
, 0);
708 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
715 r
= cg_attach(controller
, path
, pid
);
717 char prefix
[strlen(path
) + 1];
719 /* This didn't work? Then let's try all prefixes of
722 PATH_FOREACH_PREFIX(prefix
, path
) {
725 q
= cg_attach(controller
, prefix
, pid
);
734 int cg_set_group_access(
735 const char *controller
,
741 _cleanup_free_
char *fs
= NULL
;
744 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
747 if (mode
!= MODE_INVALID
)
750 r
= cg_get_path(controller
, path
, NULL
, &fs
);
754 return chmod_and_chown(fs
, mode
, uid
, gid
);
757 int cg_set_task_access(
758 const char *controller
,
764 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
769 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
772 if (mode
!= MODE_INVALID
)
775 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
779 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
783 unified
= cg_unified();
789 /* Compatibility, Always keep values for "tasks" in sync with
791 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
792 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
797 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
798 _cleanup_fclose_
FILE *f
= NULL
;
807 unified
= cg_unified();
812 if (!cg_controller_is_valid(controller
))
815 controller
= SYSTEMD_CGROUP_CONTROLLER
;
817 cs
= strlen(controller
);
820 fs
= procfs_file_alloca(pid
, "cgroup");
823 return errno
== ENOENT
? -ESRCH
: -errno
;
825 FOREACH_LINE(line
, f
, return -errno
) {
831 e
= startswith(line
, "0:");
841 const char *word
, *state
;
844 l
= strchr(line
, ':');
854 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
855 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
876 int cg_install_release_agent(const char *controller
, const char *agent
) {
877 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
883 unified
= cg_unified();
886 if (unified
) /* doesn't apply to unified hierarchy */
889 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
893 r
= read_one_line_file(fs
, &contents
);
897 sc
= strstrip(contents
);
899 r
= write_string_file(fs
, agent
, 0);
902 } else if (!path_equal(sc
, agent
))
906 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
910 contents
= mfree(contents
);
911 r
= read_one_line_file(fs
, &contents
);
915 sc
= strstrip(contents
);
916 if (streq(sc
, "0")) {
917 r
= write_string_file(fs
, "1", 0);
930 int cg_uninstall_release_agent(const char *controller
) {
931 _cleanup_free_
char *fs
= NULL
;
934 unified
= cg_unified();
937 if (unified
) /* Doesn't apply to unified hierarchy */
940 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
944 r
= write_string_file(fs
, "0", 0);
950 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
954 r
= write_string_file(fs
, "", 0);
961 int cg_is_empty(const char *controller
, const char *path
) {
962 _cleanup_fclose_
FILE *f
= NULL
;
968 r
= cg_enumerate_processes(controller
, path
, &f
);
974 r
= cg_read_pid(f
, &pid
);
981 int cg_is_empty_recursive(const char *controller
, const char *path
) {
986 /* The root cgroup is always populated */
987 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
990 unified
= cg_unified();
995 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
997 /* On the unified hierarchy we can check empty state
998 * via the "cgroup.populated" attribute. */
1000 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1004 r
= read_one_line_file(populated
, &t
);
1010 return streq(t
, "0");
1012 _cleanup_closedir_
DIR *d
= NULL
;
1015 r
= cg_is_empty(controller
, path
);
1019 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1025 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1026 _cleanup_free_
char *p
= NULL
;
1028 p
= strjoin(path
, "/", fn
, NULL
);
1033 r
= cg_is_empty_recursive(controller
, p
);
1044 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1045 char *t
= NULL
, *u
= NULL
;
1051 if (!path_is_safe(spec
))
1059 *path
= path_kill_slashes(t
);
1068 e
= strchr(spec
, ':');
1070 if (!cg_controller_is_valid(spec
))
1087 t
= strndup(spec
, e
-spec
);
1090 if (!cg_controller_is_valid(t
)) {
1104 if (!path_is_safe(u
) ||
1105 !path_is_absolute(u
)) {
1111 path_kill_slashes(u
);
1127 int cg_mangle_path(const char *path
, char **result
) {
1128 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1135 /* First, check if it already is a filesystem path */
1136 if (path_startswith(path
, "/sys/fs/cgroup")) {
1142 *result
= path_kill_slashes(t
);
1146 /* Otherwise, treat it as cg spec */
1147 r
= cg_split_spec(path
, &c
, &p
);
1151 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1154 int cg_get_root_path(char **path
) {
1160 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1164 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1166 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1168 e
= endswith(p
, "/system"); /* even more legacy */
1176 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1177 _cleanup_free_
char *rt
= NULL
;
1185 /* If the root was specified let's use that, otherwise
1186 * let's determine it from PID 1 */
1188 r
= cg_get_root_path(&rt
);
1195 p
= path_startswith(cgroup
, root
);
1196 if (p
&& p
> cgroup
)
1204 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1205 _cleanup_free_
char *raw
= NULL
;
1212 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1216 r
= cg_shift_path(raw
, root
, &c
);
1236 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1243 n
= strcspn(cgroup
, "/");
1247 c
= strndupa(cgroup
, n
);
1250 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1261 static bool valid_slice_name(const char *p
, size_t n
) {
1266 if (n
< strlen("x.slice"))
1269 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1275 c
= cg_unescape(buf
);
1277 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1283 static const char *skip_slices(const char *p
) {
1286 /* Skips over all slice assignments */
1291 p
+= strspn(p
, "/");
1293 n
= strcspn(p
, "/");
1294 if (!valid_slice_name(p
, n
))
1301 int cg_path_get_unit(const char *path
, char **ret
) {
1309 e
= skip_slices(path
);
1311 r
= cg_path_decode_unit(e
, &unit
);
1315 /* We skipped over the slices, don't accept any now */
1316 if (endswith(unit
, ".slice")) {
1325 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1326 _cleanup_free_
char *cgroup
= NULL
;
1331 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1335 return cg_path_get_unit(cgroup
, unit
);
1339 * Skip session-*.scope, but require it to be there.
1341 static const char *skip_session(const char *p
) {
1347 p
+= strspn(p
, "/");
1349 n
= strcspn(p
, "/");
1350 if (n
< strlen("session-x.scope"))
1353 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1354 char buf
[n
- 8 - 6 + 1];
1356 memcpy(buf
, p
+ 8, n
- 8 - 6);
1359 /* Note that session scopes never need unescaping,
1360 * since they cannot conflict with the kernel's own
1361 * names, hence we don't need to call cg_unescape()
1364 if (!session_id_valid(buf
))
1368 p
+= strspn(p
, "/");
1376 * Skip user@*.service, but require it to be there.
1378 static const char *skip_user_manager(const char *p
) {
1384 p
+= strspn(p
, "/");
1386 n
= strcspn(p
, "/");
1387 if (n
< strlen("user@x.service"))
1390 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1391 char buf
[n
- 5 - 8 + 1];
1393 memcpy(buf
, p
+ 5, n
- 5 - 8);
1396 /* Note that user manager services never need unescaping,
1397 * since they cannot conflict with the kernel's own
1398 * names, hence we don't need to call cg_unescape()
1401 if (parse_uid(buf
, NULL
) < 0)
1405 p
+= strspn(p
, "/");
1413 static const char *skip_user_prefix(const char *path
) {
1418 /* Skip slices, if there are any */
1419 e
= skip_slices(path
);
1421 /* Skip the user manager, if it's in the path now... */
1422 t
= skip_user_manager(e
);
1426 /* Alternatively skip the user session if it is in the path... */
1427 return skip_session(e
);
1430 int cg_path_get_user_unit(const char *path
, char **ret
) {
1436 t
= skip_user_prefix(path
);
1440 /* And from here on it looks pretty much the same as for a
1441 * system unit, hence let's use the same parser from here
1443 return cg_path_get_unit(t
, ret
);
1446 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1447 _cleanup_free_
char *cgroup
= NULL
;
1452 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1456 return cg_path_get_user_unit(cgroup
, unit
);
1459 int cg_path_get_machine_name(const char *path
, char **machine
) {
1460 _cleanup_free_
char *u
= NULL
;
1464 r
= cg_path_get_unit(path
, &u
);
1468 sl
= strjoina("/run/systemd/machines/unit:", u
);
1469 return readlink_malloc(sl
, machine
);
1472 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1473 _cleanup_free_
char *cgroup
= NULL
;
1478 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1482 return cg_path_get_machine_name(cgroup
, machine
);
1485 int cg_path_get_session(const char *path
, char **session
) {
1486 _cleanup_free_
char *unit
= NULL
;
1492 r
= cg_path_get_unit(path
, &unit
);
1496 start
= startswith(unit
, "session-");
1499 end
= endswith(start
, ".scope");
1504 if (!session_id_valid(start
))
1520 int cg_pid_get_session(pid_t pid
, char **session
) {
1521 _cleanup_free_
char *cgroup
= NULL
;
1524 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1528 return cg_path_get_session(cgroup
, session
);
1531 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1532 _cleanup_free_
char *slice
= NULL
;
1538 r
= cg_path_get_slice(path
, &slice
);
1542 start
= startswith(slice
, "user-");
1545 end
= endswith(start
, ".slice");
1550 if (parse_uid(start
, uid
) < 0)
1556 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1557 _cleanup_free_
char *cgroup
= NULL
;
1560 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1564 return cg_path_get_owner_uid(cgroup
, uid
);
1567 int cg_path_get_slice(const char *p
, char **slice
) {
1568 const char *e
= NULL
;
1573 /* Finds the right-most slice unit from the beginning, but
1574 * stops before we come to the first non-slice unit. */
1579 p
+= strspn(p
, "/");
1581 n
= strcspn(p
, "/");
1582 if (!valid_slice_name(p
, n
)) {
1587 s
= strdup("-.slice");
1595 return cg_path_decode_unit(e
, slice
);
1603 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1604 _cleanup_free_
char *cgroup
= NULL
;
1609 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1613 return cg_path_get_slice(cgroup
, slice
);
1616 int cg_path_get_user_slice(const char *p
, char **slice
) {
1621 t
= skip_user_prefix(p
);
1625 /* And now it looks pretty much the same as for a system
1626 * slice, so let's just use the same parser from here on. */
1627 return cg_path_get_slice(t
, slice
);
1630 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1631 _cleanup_free_
char *cgroup
= NULL
;
1636 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1640 return cg_path_get_user_slice(cgroup
, slice
);
1643 char *cg_escape(const char *p
) {
1644 bool need_prefix
= false;
1646 /* This implements very minimal escaping for names to be used
1647 * as file names in the cgroup tree: any name which might
1648 * conflict with a kernel name or is prefixed with '_' is
1649 * prefixed with a '_'. That way, when reading cgroup names it
1650 * is sufficient to remove a single prefixing underscore if
1653 /* The return value of this function (unlike cg_unescape())
1659 streq(p
, "notify_on_release") ||
1660 streq(p
, "release_agent") ||
1661 streq(p
, "tasks") ||
1662 startswith(p
, "cgroup."))
1667 dot
= strrchr(p
, '.');
1672 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1675 n
= cgroup_controller_to_string(c
);
1680 if (memcmp(p
, n
, l
) != 0)
1690 return strappend("_", p
);
1695 char *cg_unescape(const char *p
) {
1698 /* The return value of this function (unlike cg_escape())
1699 * doesn't need free()! */
1707 #define CONTROLLER_VALID \
1711 bool cg_controller_is_valid(const char *p
) {
1717 s
= startswith(p
, "name=");
1721 if (*p
== 0 || *p
== '_')
1724 for (t
= p
; *t
; t
++)
1725 if (!strchr(CONTROLLER_VALID
, *t
))
1728 if (t
- p
> FILENAME_MAX
)
1734 int cg_slice_to_path(const char *unit
, char **ret
) {
1735 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1742 if (streq(unit
, "-.slice")) {
1752 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1755 if (!endswith(unit
, ".slice"))
1758 r
= unit_name_to_prefix(unit
, &p
);
1762 dash
= strchr(p
, '-');
1764 /* Don't allow initial dashes */
1769 _cleanup_free_
char *escaped
= NULL
;
1770 char n
[dash
- p
+ sizeof(".slice")];
1772 /* Don't allow trailing or double dashes */
1773 if (dash
[1] == 0 || dash
[1] == '-')
1776 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1777 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1780 escaped
= cg_escape(n
);
1784 if (!strextend(&s
, escaped
, "/", NULL
))
1787 dash
= strchr(dash
+1, '-');
1790 e
= cg_escape(unit
);
1794 if (!strextend(&s
, e
, NULL
))
1803 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1804 _cleanup_free_
char *p
= NULL
;
1807 r
= cg_get_path(controller
, path
, attribute
, &p
);
1811 return write_string_file(p
, value
, 0);
1814 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1815 _cleanup_free_
char *p
= NULL
;
1818 r
= cg_get_path(controller
, path
, attribute
, &p
);
1822 return read_one_line_file(p
, ret
);
1825 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1829 /* This one will create a cgroup in our private tree, but also
1830 * duplicate it in the trees specified in mask, and remove it
1833 /* First create the cgroup in our own hierarchy. */
1834 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1838 /* If we are in the unified hierarchy, we are done now */
1839 unified
= cg_unified();
1845 /* Otherwise, do the same in the other hierarchies */
1846 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1847 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1850 n
= cgroup_controller_to_string(c
);
1853 (void) cg_create(n
, path
);
1854 else if (supported
& bit
)
1855 (void) cg_trim(n
, path
, true);
1861 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1865 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1869 unified
= cg_unified();
1875 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1876 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1877 const char *p
= NULL
;
1879 if (!(supported
& bit
))
1883 p
= path_callback(bit
, userdata
);
1888 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1894 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1899 SET_FOREACH(pidp
, pids
, i
) {
1900 pid_t pid
= PTR_TO_PID(pidp
);
1903 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1904 if (q
< 0 && r
>= 0)
1911 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1915 if (!path_equal(from
, to
)) {
1916 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1921 unified
= cg_unified();
1927 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1928 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1929 const char *p
= NULL
;
1931 if (!(supported
& bit
))
1935 p
= to_callback(bit
, userdata
);
1940 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1946 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1950 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1954 unified
= cg_unified();
1960 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1961 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1963 if (!(supported
& bit
))
1966 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1972 int cg_mask_supported(CGroupMask
*ret
) {
1973 CGroupMask mask
= 0;
1976 /* Determines the mask of supported cgroup controllers. Only
1977 * includes controllers we can make sense of and that are
1978 * actually accessible. */
1980 unified
= cg_unified();
1984 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1987 /* In the unified hierarchy we can read the supported
1988 * and accessible controllers from a the top-level
1989 * cgroup attribute */
1991 r
= cg_get_root_path(&root
);
1995 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
1999 r
= read_one_line_file(path
, &controllers
);
2005 _cleanup_free_
char *n
= NULL
;
2008 r
= extract_first_word(&c
, &n
, NULL
, 0);
2014 v
= cgroup_controller_from_string(n
);
2018 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2021 /* Currently, we only support the memory and pids
2022 * controller in the unified hierarchy, mask
2023 * everything else off. */
2024 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2029 /* In the legacy hierarchy, we check whether which
2030 * hierarchies are mounted. */
2032 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2035 n
= cgroup_controller_to_string(c
);
2036 if (controller_is_accessible(n
) >= 0)
2037 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2045 int cg_kernel_controllers(Set
*controllers
) {
2046 _cleanup_fclose_
FILE *f
= NULL
;
2050 assert(controllers
);
2052 /* Determines the full list of kernel-known controllers. Might
2053 * include controllers we don't actually support, arbitrary
2054 * named hierarchies and controllers that aren't currently
2055 * accessible (because not mounted). */
2057 f
= fopen("/proc/cgroups", "re");
2059 if (errno
== ENOENT
)
2064 /* Ignore the header line */
2065 (void) fgets(buf
, sizeof(buf
), f
);
2072 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2077 if (ferror(f
) && errno
!= 0)
2088 if (!cg_controller_is_valid(controller
)) {
2093 r
= set_consume(controllers
, controller
);
2101 static thread_local
int unified_cache
= -1;
2103 int cg_unified(void) {
2106 /* Checks if we support the unified hierarchy. Returns an
2107 * error when the cgroup hierarchies aren't mounted yet or we
2108 * have any other trouble determining if the unified hierarchy
2111 if (unified_cache
>= 0)
2112 return unified_cache
;
2114 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2117 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2118 unified_cache
= true;
2119 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2120 unified_cache
= false;
2124 return unified_cache
;
2127 void cg_unified_flush(void) {
2131 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2132 _cleanup_free_
char *fs
= NULL
;
2141 unified
= cg_unified();
2144 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2147 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2151 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2152 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2155 if (!(supported
& bit
))
2158 n
= cgroup_controller_to_string(c
);
2160 char s
[1 + strlen(n
) + 1];
2162 s
[0] = mask
& bit
? '+' : '-';
2165 r
= write_string_file(fs
, s
, 0);
2167 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2174 bool cg_is_unified_wanted(void) {
2175 static thread_local
int wanted
= -1;
2178 /* If the hierarchy is already mounted, then follow whatever
2179 * was chosen for it. */
2180 unified
= cg_unified();
2184 /* Otherwise, let's see what the kernel command line has to
2185 * say. Since checking that is expensive, let's cache the
2190 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2192 return (wanted
= true);
2194 _cleanup_free_
char *value
= NULL
;
2196 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2200 return (wanted
= false);
2202 return (wanted
= parse_boolean(value
) > 0);
2206 bool cg_is_legacy_wanted(void) {
2207 return !cg_is_unified_wanted();
2210 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2211 [CGROUP_CONTROLLER_CPU
] = "cpu",
2212 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2213 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2214 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2215 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2216 [CGROUP_CONTROLLER_PIDS
] = "pids",
2219 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);