1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
33 #include "extract-word.h"
36 #include "formats-util.h"
37 #include "login-util.h"
40 #include "parse-util.h"
41 #include "path-util.h"
42 #include "process-util.h"
45 #include "string-util.h"
46 #include "unit-name.h"
47 #include "user-util.h"
50 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
51 _cleanup_free_
char *fs
= NULL
;
57 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
69 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
72 /* Note that the cgroup.procs might contain duplicates! See
73 * cgroups.txt for details. */
79 if (fscanf(f
, "%lu", &ul
) != 1) {
84 return errno
? -errno
: -EIO
;
94 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
95 _cleanup_free_
char *fs
= NULL
;
101 /* This is not recursive! */
103 r
= cg_get_path(controller
, path
, NULL
, &fs
);
115 int cg_read_subgroup(DIR *d
, char **fn
) {
121 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
124 if (de
->d_type
!= DT_DIR
)
127 if (streq(de
->d_name
, ".") ||
128 streq(de
->d_name
, ".."))
131 b
= strdup(de
->d_name
);
142 int cg_rmdir(const char *controller
, const char *path
) {
143 _cleanup_free_
char *p
= NULL
;
146 r
= cg_get_path(controller
, path
, NULL
, &p
);
151 if (r
< 0 && errno
!= ENOENT
)
157 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
158 _cleanup_set_free_ Set
*allocated_set
= NULL
;
165 /* This goes through the tasks list and kills them all. This
166 * is repeated until no further processes are added to the
167 * tasks list, to properly handle forking processes */
170 s
= allocated_set
= set_new(NULL
);
178 _cleanup_fclose_
FILE *f
= NULL
;
182 r
= cg_enumerate_processes(controller
, path
, &f
);
184 if (ret
>= 0 && r
!= -ENOENT
)
190 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
192 if (ignore_self
&& pid
== my_pid
)
195 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
198 /* If we haven't killed this process yet, kill
200 if (kill(pid
, sig
) < 0) {
201 if (ret
>= 0 && errno
!= ESRCH
)
204 if (sigcont
&& sig
!= SIGKILL
)
205 (void) kill(pid
, SIGCONT
);
213 r
= set_put(s
, PID_TO_PTR(pid
));
229 /* To avoid racing against processes which fork
230 * quicker than we can kill them we repeat this until
231 * no new pids need to be killed. */
238 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
239 _cleanup_set_free_ Set
*allocated_set
= NULL
;
240 _cleanup_closedir_
DIR *d
= NULL
;
248 s
= allocated_set
= set_new(NULL
);
253 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
255 r
= cg_enumerate_subgroups(controller
, path
, &d
);
257 if (ret
>= 0 && r
!= -ENOENT
)
263 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
264 _cleanup_free_
char *p
= NULL
;
266 p
= strjoin(path
, "/", fn
, NULL
);
271 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
272 if (r
!= 0 && ret
>= 0)
276 if (ret
>= 0 && r
< 0)
280 r
= cg_rmdir(controller
, path
);
281 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
288 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
290 _cleanup_set_free_ Set
*s
= NULL
;
306 _cleanup_fclose_
FILE *f
= NULL
;
310 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
312 if (ret
>= 0 && r
!= -ENOENT
)
318 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
320 /* This might do weird stuff if we aren't a
321 * single-threaded program. However, we
322 * luckily know we are not */
323 if (ignore_self
&& pid
== my_pid
)
326 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
329 /* Ignore kernel threads. Since they can only
330 * exist in the root cgroup, we only check for
333 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
334 is_kernel_thread(pid
) > 0)
337 r
= cg_attach(cto
, pto
, pid
);
339 if (ret
>= 0 && r
!= -ESRCH
)
346 r
= set_put(s
, PID_TO_PTR(pid
));
366 int cg_migrate_recursive(
374 _cleanup_closedir_
DIR *d
= NULL
;
383 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
385 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
387 if (ret
>= 0 && r
!= -ENOENT
)
393 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
394 _cleanup_free_
char *p
= NULL
;
396 p
= strjoin(pfrom
, "/", fn
, NULL
);
401 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
402 if (r
!= 0 && ret
>= 0)
406 if (r
< 0 && ret
>= 0)
410 r
= cg_rmdir(cfrom
, pfrom
);
411 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
418 int cg_migrate_recursive_fallback(
433 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
435 char prefix
[strlen(pto
) + 1];
437 /* This didn't work? Then let's try all prefixes of the destination */
439 PATH_FOREACH_PREFIX(prefix
, pto
) {
442 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
451 static const char *controller_to_dirname(const char *controller
) {
456 /* Converts a controller name to the directory name below
457 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
458 * just cuts off the name= prefixed used for named
459 * hierarchies, if it is specified. */
461 e
= startswith(controller
, "name=");
468 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
475 dn
= controller_to_dirname(controller
);
477 if (isempty(path
) && isempty(suffix
))
478 t
= strappend("/sys/fs/cgroup/", dn
);
479 else if (isempty(path
))
480 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
481 else if (isempty(suffix
))
482 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
484 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
492 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
497 if (isempty(path
) && isempty(suffix
))
498 t
= strdup("/sys/fs/cgroup");
499 else if (isempty(path
))
500 t
= strappend("/sys/fs/cgroup/", suffix
);
501 else if (isempty(suffix
))
502 t
= strappend("/sys/fs/cgroup/", path
);
504 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
512 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
520 /* If no controller is specified, we return the path
521 * *below* the controllers, without any prefix. */
523 if (!path
&& !suffix
)
531 t
= strjoin(path
, "/", suffix
, NULL
);
535 *fs
= path_kill_slashes(t
);
539 if (!cg_controller_is_valid(controller
))
542 unified
= cg_unified();
547 r
= join_path_unified(path
, suffix
, fs
);
549 r
= join_path_legacy(controller
, path
, suffix
, fs
);
553 path_kill_slashes(*fs
);
557 static int controller_is_accessible(const char *controller
) {
562 /* Checks whether a specific controller is accessible,
563 * i.e. its hierarchy mounted. In the unified hierarchy all
564 * controllers are considered accessible, except for the named
567 if (!cg_controller_is_valid(controller
))
570 unified
= cg_unified();
574 /* We don't support named hierarchies if we are using
575 * the unified hierarchy. */
577 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
580 if (startswith(controller
, "name="))
586 dn
= controller_to_dirname(controller
);
587 cc
= strjoina("/sys/fs/cgroup/", dn
);
589 if (laccess(cc
, F_OK
) < 0)
596 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
602 /* Check if the specified controller is actually accessible */
603 r
= controller_is_accessible(controller
);
607 return cg_get_path(controller
, path
, suffix
, fs
);
610 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
615 if (typeflag
!= FTW_DP
)
618 if (ftwbuf
->level
< 1)
625 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
626 _cleanup_free_
char *fs
= NULL
;
631 r
= cg_get_path(controller
, path
, NULL
, &fs
);
636 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
646 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
653 int cg_create(const char *controller
, const char *path
) {
654 _cleanup_free_
char *fs
= NULL
;
657 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
661 r
= mkdir_parents(fs
, 0755);
665 if (mkdir(fs
, 0755) < 0) {
676 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
681 r
= cg_create(controller
, path
);
685 q
= cg_attach(controller
, path
, pid
);
689 /* This does not remove the cgroup on failure */
693 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
694 _cleanup_free_
char *fs
= NULL
;
695 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
701 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
708 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
710 return write_string_file(fs
, c
, 0);
713 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
720 r
= cg_attach(controller
, path
, pid
);
722 char prefix
[strlen(path
) + 1];
724 /* This didn't work? Then let's try all prefixes of
727 PATH_FOREACH_PREFIX(prefix
, path
) {
730 q
= cg_attach(controller
, prefix
, pid
);
739 int cg_set_group_access(
740 const char *controller
,
746 _cleanup_free_
char *fs
= NULL
;
749 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
752 if (mode
!= MODE_INVALID
)
755 r
= cg_get_path(controller
, path
, NULL
, &fs
);
759 return chmod_and_chown(fs
, mode
, uid
, gid
);
762 int cg_set_task_access(
763 const char *controller
,
769 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
774 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
777 if (mode
!= MODE_INVALID
)
780 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
784 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
788 unified
= cg_unified();
794 /* Compatibility, Always keep values for "tasks" in sync with
796 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
797 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
802 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
803 _cleanup_fclose_
FILE *f
= NULL
;
812 unified
= cg_unified();
817 if (!cg_controller_is_valid(controller
))
820 controller
= SYSTEMD_CGROUP_CONTROLLER
;
822 cs
= strlen(controller
);
825 fs
= procfs_file_alloca(pid
, "cgroup");
828 return errno
== ENOENT
? -ESRCH
: -errno
;
830 FOREACH_LINE(line
, f
, return -errno
) {
836 e
= startswith(line
, "0:");
846 const char *word
, *state
;
849 l
= strchr(line
, ':');
859 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
860 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
881 int cg_install_release_agent(const char *controller
, const char *agent
) {
882 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
888 unified
= cg_unified();
891 if (unified
) /* doesn't apply to unified hierarchy */
894 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
898 r
= read_one_line_file(fs
, &contents
);
902 sc
= strstrip(contents
);
904 r
= write_string_file(fs
, agent
, 0);
907 } else if (!path_equal(sc
, agent
))
911 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
915 contents
= mfree(contents
);
916 r
= read_one_line_file(fs
, &contents
);
920 sc
= strstrip(contents
);
921 if (streq(sc
, "0")) {
922 r
= write_string_file(fs
, "1", 0);
935 int cg_uninstall_release_agent(const char *controller
) {
936 _cleanup_free_
char *fs
= NULL
;
939 unified
= cg_unified();
942 if (unified
) /* Doesn't apply to unified hierarchy */
945 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
949 r
= write_string_file(fs
, "0", 0);
955 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
959 r
= write_string_file(fs
, "", 0);
966 int cg_is_empty(const char *controller
, const char *path
) {
967 _cleanup_fclose_
FILE *f
= NULL
;
973 r
= cg_enumerate_processes(controller
, path
, &f
);
979 r
= cg_read_pid(f
, &pid
);
986 int cg_is_empty_recursive(const char *controller
, const char *path
) {
991 /* The root cgroup is always populated */
992 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
995 unified
= cg_unified();
1000 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1002 /* On the unified hierarchy we can check empty state
1003 * via the "cgroup.populated" attribute. */
1005 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1009 r
= read_one_line_file(populated
, &t
);
1015 return streq(t
, "0");
1017 _cleanup_closedir_
DIR *d
= NULL
;
1020 r
= cg_is_empty(controller
, path
);
1024 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1030 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1031 _cleanup_free_
char *p
= NULL
;
1033 p
= strjoin(path
, "/", fn
, NULL
);
1038 r
= cg_is_empty_recursive(controller
, p
);
1049 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1050 char *t
= NULL
, *u
= NULL
;
1056 if (!path_is_safe(spec
))
1064 *path
= path_kill_slashes(t
);
1073 e
= strchr(spec
, ':');
1075 if (!cg_controller_is_valid(spec
))
1092 t
= strndup(spec
, e
-spec
);
1095 if (!cg_controller_is_valid(t
)) {
1109 if (!path_is_safe(u
) ||
1110 !path_is_absolute(u
)) {
1116 path_kill_slashes(u
);
1132 int cg_mangle_path(const char *path
, char **result
) {
1133 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1140 /* First, check if it already is a filesystem path */
1141 if (path_startswith(path
, "/sys/fs/cgroup")) {
1147 *result
= path_kill_slashes(t
);
1151 /* Otherwise, treat it as cg spec */
1152 r
= cg_split_spec(path
, &c
, &p
);
1156 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1159 int cg_get_root_path(char **path
) {
1165 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1169 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1171 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1173 e
= endswith(p
, "/system"); /* even more legacy */
1181 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1182 _cleanup_free_
char *rt
= NULL
;
1190 /* If the root was specified let's use that, otherwise
1191 * let's determine it from PID 1 */
1193 r
= cg_get_root_path(&rt
);
1200 p
= path_startswith(cgroup
, root
);
1201 if (p
&& p
> cgroup
)
1209 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1210 _cleanup_free_
char *raw
= NULL
;
1217 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1221 r
= cg_shift_path(raw
, root
, &c
);
1241 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1248 n
= strcspn(cgroup
, "/");
1252 c
= strndupa(cgroup
, n
);
1255 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1266 static bool valid_slice_name(const char *p
, size_t n
) {
1271 if (n
< strlen("x.slice"))
1274 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1280 c
= cg_unescape(buf
);
1282 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1288 static const char *skip_slices(const char *p
) {
1291 /* Skips over all slice assignments */
1296 p
+= strspn(p
, "/");
1298 n
= strcspn(p
, "/");
1299 if (!valid_slice_name(p
, n
))
1306 int cg_path_get_unit(const char *path
, char **ret
) {
1314 e
= skip_slices(path
);
1316 r
= cg_path_decode_unit(e
, &unit
);
1320 /* We skipped over the slices, don't accept any now */
1321 if (endswith(unit
, ".slice")) {
1330 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1331 _cleanup_free_
char *cgroup
= NULL
;
1336 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1340 return cg_path_get_unit(cgroup
, unit
);
1344 * Skip session-*.scope, but require it to be there.
1346 static const char *skip_session(const char *p
) {
1352 p
+= strspn(p
, "/");
1354 n
= strcspn(p
, "/");
1355 if (n
< strlen("session-x.scope"))
1358 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1359 char buf
[n
- 8 - 6 + 1];
1361 memcpy(buf
, p
+ 8, n
- 8 - 6);
1364 /* Note that session scopes never need unescaping,
1365 * since they cannot conflict with the kernel's own
1366 * names, hence we don't need to call cg_unescape()
1369 if (!session_id_valid(buf
))
1373 p
+= strspn(p
, "/");
1381 * Skip user@*.service, but require it to be there.
1383 static const char *skip_user_manager(const char *p
) {
1389 p
+= strspn(p
, "/");
1391 n
= strcspn(p
, "/");
1392 if (n
< strlen("user@x.service"))
1395 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1396 char buf
[n
- 5 - 8 + 1];
1398 memcpy(buf
, p
+ 5, n
- 5 - 8);
1401 /* Note that user manager services never need unescaping,
1402 * since they cannot conflict with the kernel's own
1403 * names, hence we don't need to call cg_unescape()
1406 if (parse_uid(buf
, NULL
) < 0)
1410 p
+= strspn(p
, "/");
1418 static const char *skip_user_prefix(const char *path
) {
1423 /* Skip slices, if there are any */
1424 e
= skip_slices(path
);
1426 /* Skip the user manager, if it's in the path now... */
1427 t
= skip_user_manager(e
);
1431 /* Alternatively skip the user session if it is in the path... */
1432 return skip_session(e
);
1435 int cg_path_get_user_unit(const char *path
, char **ret
) {
1441 t
= skip_user_prefix(path
);
1445 /* And from here on it looks pretty much the same as for a
1446 * system unit, hence let's use the same parser from here
1448 return cg_path_get_unit(t
, ret
);
1451 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1452 _cleanup_free_
char *cgroup
= NULL
;
1457 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1461 return cg_path_get_user_unit(cgroup
, unit
);
1464 int cg_path_get_machine_name(const char *path
, char **machine
) {
1465 _cleanup_free_
char *u
= NULL
;
1469 r
= cg_path_get_unit(path
, &u
);
1473 sl
= strjoina("/run/systemd/machines/unit:", u
);
1474 return readlink_malloc(sl
, machine
);
1477 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1478 _cleanup_free_
char *cgroup
= NULL
;
1483 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1487 return cg_path_get_machine_name(cgroup
, machine
);
1490 int cg_path_get_session(const char *path
, char **session
) {
1491 _cleanup_free_
char *unit
= NULL
;
1497 r
= cg_path_get_unit(path
, &unit
);
1501 start
= startswith(unit
, "session-");
1504 end
= endswith(start
, ".scope");
1509 if (!session_id_valid(start
))
1525 int cg_pid_get_session(pid_t pid
, char **session
) {
1526 _cleanup_free_
char *cgroup
= NULL
;
1529 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1533 return cg_path_get_session(cgroup
, session
);
1536 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1537 _cleanup_free_
char *slice
= NULL
;
1543 r
= cg_path_get_slice(path
, &slice
);
1547 start
= startswith(slice
, "user-");
1550 end
= endswith(start
, ".slice");
1555 if (parse_uid(start
, uid
) < 0)
1561 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1562 _cleanup_free_
char *cgroup
= NULL
;
1565 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1569 return cg_path_get_owner_uid(cgroup
, uid
);
1572 int cg_path_get_slice(const char *p
, char **slice
) {
1573 const char *e
= NULL
;
1578 /* Finds the right-most slice unit from the beginning, but
1579 * stops before we come to the first non-slice unit. */
1584 p
+= strspn(p
, "/");
1586 n
= strcspn(p
, "/");
1587 if (!valid_slice_name(p
, n
)) {
1592 s
= strdup("-.slice");
1600 return cg_path_decode_unit(e
, slice
);
1608 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1609 _cleanup_free_
char *cgroup
= NULL
;
1614 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1618 return cg_path_get_slice(cgroup
, slice
);
1621 int cg_path_get_user_slice(const char *p
, char **slice
) {
1626 t
= skip_user_prefix(p
);
1630 /* And now it looks pretty much the same as for a system
1631 * slice, so let's just use the same parser from here on. */
1632 return cg_path_get_slice(t
, slice
);
1635 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1636 _cleanup_free_
char *cgroup
= NULL
;
1641 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1645 return cg_path_get_user_slice(cgroup
, slice
);
1648 char *cg_escape(const char *p
) {
1649 bool need_prefix
= false;
1651 /* This implements very minimal escaping for names to be used
1652 * as file names in the cgroup tree: any name which might
1653 * conflict with a kernel name or is prefixed with '_' is
1654 * prefixed with a '_'. That way, when reading cgroup names it
1655 * is sufficient to remove a single prefixing underscore if
1658 /* The return value of this function (unlike cg_unescape())
1664 streq(p
, "notify_on_release") ||
1665 streq(p
, "release_agent") ||
1666 streq(p
, "tasks") ||
1667 startswith(p
, "cgroup."))
1672 dot
= strrchr(p
, '.');
1677 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1680 n
= cgroup_controller_to_string(c
);
1685 if (memcmp(p
, n
, l
) != 0)
1695 return strappend("_", p
);
1700 char *cg_unescape(const char *p
) {
1703 /* The return value of this function (unlike cg_escape())
1704 * doesn't need free()! */
1712 #define CONTROLLER_VALID \
1716 bool cg_controller_is_valid(const char *p
) {
1722 s
= startswith(p
, "name=");
1726 if (*p
== 0 || *p
== '_')
1729 for (t
= p
; *t
; t
++)
1730 if (!strchr(CONTROLLER_VALID
, *t
))
1733 if (t
- p
> FILENAME_MAX
)
1739 int cg_slice_to_path(const char *unit
, char **ret
) {
1740 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1747 if (streq(unit
, "-.slice")) {
1757 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1760 if (!endswith(unit
, ".slice"))
1763 r
= unit_name_to_prefix(unit
, &p
);
1767 dash
= strchr(p
, '-');
1769 /* Don't allow initial dashes */
1774 _cleanup_free_
char *escaped
= NULL
;
1775 char n
[dash
- p
+ sizeof(".slice")];
1777 /* Don't allow trailing or double dashes */
1778 if (dash
[1] == 0 || dash
[1] == '-')
1781 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1782 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1785 escaped
= cg_escape(n
);
1789 if (!strextend(&s
, escaped
, "/", NULL
))
1792 dash
= strchr(dash
+1, '-');
1795 e
= cg_escape(unit
);
1799 if (!strextend(&s
, e
, NULL
))
1808 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1809 _cleanup_free_
char *p
= NULL
;
1812 r
= cg_get_path(controller
, path
, attribute
, &p
);
1816 return write_string_file(p
, value
, 0);
1819 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1820 _cleanup_free_
char *p
= NULL
;
1823 r
= cg_get_path(controller
, path
, attribute
, &p
);
1827 return read_one_line_file(p
, ret
);
1830 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1834 /* This one will create a cgroup in our private tree, but also
1835 * duplicate it in the trees specified in mask, and remove it
1838 /* First create the cgroup in our own hierarchy. */
1839 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1843 /* If we are in the unified hierarchy, we are done now */
1844 unified
= cg_unified();
1850 /* Otherwise, do the same in the other hierarchies */
1851 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1852 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1855 n
= cgroup_controller_to_string(c
);
1858 (void) cg_create(n
, path
);
1859 else if (supported
& bit
)
1860 (void) cg_trim(n
, path
, true);
1866 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1870 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1874 unified
= cg_unified();
1880 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1881 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1882 const char *p
= NULL
;
1884 if (!(supported
& bit
))
1888 p
= path_callback(bit
, userdata
);
1893 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1899 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1904 SET_FOREACH(pidp
, pids
, i
) {
1905 pid_t pid
= PTR_TO_PID(pidp
);
1908 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1909 if (q
< 0 && r
>= 0)
1916 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1920 if (!path_equal(from
, to
)) {
1921 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1926 unified
= cg_unified();
1932 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1933 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1934 const char *p
= NULL
;
1936 if (!(supported
& bit
))
1940 p
= to_callback(bit
, userdata
);
1945 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1951 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1955 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1959 unified
= cg_unified();
1965 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1966 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1968 if (!(supported
& bit
))
1971 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1977 int cg_mask_supported(CGroupMask
*ret
) {
1978 CGroupMask mask
= 0;
1981 /* Determines the mask of supported cgroup controllers. Only
1982 * includes controllers we can make sense of and that are
1983 * actually accessible. */
1985 unified
= cg_unified();
1989 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1992 /* In the unified hierarchy we can read the supported
1993 * and accessible controllers from a the top-level
1994 * cgroup attribute */
1996 r
= cg_get_root_path(&root
);
2000 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2004 r
= read_one_line_file(path
, &controllers
);
2010 _cleanup_free_
char *n
= NULL
;
2013 r
= extract_first_word(&c
, &n
, NULL
, 0);
2019 v
= cgroup_controller_from_string(n
);
2023 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2026 /* Currently, we only support the memory and pids
2027 * controller in the unified hierarchy, mask
2028 * everything else off. */
2029 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2034 /* In the legacy hierarchy, we check whether which
2035 * hierarchies are mounted. */
2037 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2040 n
= cgroup_controller_to_string(c
);
2041 if (controller_is_accessible(n
) >= 0)
2042 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2050 int cg_kernel_controllers(Set
*controllers
) {
2051 _cleanup_fclose_
FILE *f
= NULL
;
2055 assert(controllers
);
2057 /* Determines the full list of kernel-known controllers. Might
2058 * include controllers we don't actually support, arbitrary
2059 * named hierarchies and controllers that aren't currently
2060 * accessible (because not mounted). */
2062 f
= fopen("/proc/cgroups", "re");
2064 if (errno
== ENOENT
)
2069 /* Ignore the header line */
2070 (void) fgets(buf
, sizeof(buf
), f
);
2077 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2082 if (ferror(f
) && errno
!= 0)
2093 if (!cg_controller_is_valid(controller
)) {
2098 r
= set_consume(controllers
, controller
);
2106 static thread_local
int unified_cache
= -1;
2108 int cg_unified(void) {
2111 /* Checks if we support the unified hierarchy. Returns an
2112 * error when the cgroup hierarchies aren't mounted yet or we
2113 * have any other trouble determining if the unified hierarchy
2116 if (unified_cache
>= 0)
2117 return unified_cache
;
2119 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2122 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2123 unified_cache
= true;
2124 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2125 unified_cache
= false;
2129 return unified_cache
;
2132 void cg_unified_flush(void) {
2136 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2137 _cleanup_free_
char *fs
= NULL
;
2146 unified
= cg_unified();
2149 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2152 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2156 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2157 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2160 if (!(supported
& bit
))
2163 n
= cgroup_controller_to_string(c
);
2165 char s
[1 + strlen(n
) + 1];
2167 s
[0] = mask
& bit
? '+' : '-';
2170 r
= write_string_file(fs
, s
, 0);
2172 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2179 bool cg_is_unified_wanted(void) {
2180 static thread_local
int wanted
= -1;
2183 /* If the hierarchy is already mounted, then follow whatever
2184 * was chosen for it. */
2185 unified
= cg_unified();
2189 /* Otherwise, let's see what the kernel command line has to
2190 * say. Since checking that is expensive, let's cache the
2195 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2197 return (wanted
= true);
2199 _cleanup_free_
char *value
= NULL
;
2201 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2205 return (wanted
= false);
2207 return (wanted
= parse_boolean(value
) > 0);
2211 bool cg_is_legacy_wanted(void) {
2212 return !cg_is_unified_wanted();
2215 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2220 *ret
= CGROUP_CPU_SHARES_INVALID
;
2224 r
= safe_atou64(s
, &u
);
2228 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2235 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2240 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2244 r
= safe_atou64(s
, &u
);
2248 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2255 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2256 [CGROUP_CONTROLLER_CPU
] = "cpu",
2257 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2258 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2259 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2260 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2261 [CGROUP_CONTROLLER_PIDS
] = "pids",
2262 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2265 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);