1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
33 #include "extract-word.h"
36 #include "formats-util.h"
37 #include "login-util.h"
40 #include "path-util.h"
41 #include "process-util.h"
44 #include "string-util.h"
45 #include "unit-name.h"
48 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
49 _cleanup_free_
char *fs
= NULL
;
55 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
67 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
70 /* Note that the cgroup.procs might contain duplicates! See
71 * cgroups.txt for details. */
77 if (fscanf(f
, "%lu", &ul
) != 1) {
82 return errno
? -errno
: -EIO
;
92 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
93 _cleanup_free_
char *fs
= NULL
;
99 /* This is not recursive! */
101 r
= cg_get_path(controller
, path
, NULL
, &fs
);
113 int cg_read_subgroup(DIR *d
, char **fn
) {
119 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
122 if (de
->d_type
!= DT_DIR
)
125 if (streq(de
->d_name
, ".") ||
126 streq(de
->d_name
, ".."))
129 b
= strdup(de
->d_name
);
140 int cg_rmdir(const char *controller
, const char *path
) {
141 _cleanup_free_
char *p
= NULL
;
144 r
= cg_get_path(controller
, path
, NULL
, &p
);
149 if (r
< 0 && errno
!= ENOENT
)
155 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
156 _cleanup_set_free_ Set
*allocated_set
= NULL
;
163 /* This goes through the tasks list and kills them all. This
164 * is repeated until no further processes are added to the
165 * tasks list, to properly handle forking processes */
168 s
= allocated_set
= set_new(NULL
);
176 _cleanup_fclose_
FILE *f
= NULL
;
180 r
= cg_enumerate_processes(controller
, path
, &f
);
182 if (ret
>= 0 && r
!= -ENOENT
)
188 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
190 if (ignore_self
&& pid
== my_pid
)
193 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
196 /* If we haven't killed this process yet, kill
198 if (kill(pid
, sig
) < 0) {
199 if (ret
>= 0 && errno
!= ESRCH
)
202 if (sigcont
&& sig
!= SIGKILL
)
203 (void) kill(pid
, SIGCONT
);
211 r
= set_put(s
, PID_TO_PTR(pid
));
227 /* To avoid racing against processes which fork
228 * quicker than we can kill them we repeat this until
229 * no new pids need to be killed. */
236 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
237 _cleanup_set_free_ Set
*allocated_set
= NULL
;
238 _cleanup_closedir_
DIR *d
= NULL
;
246 s
= allocated_set
= set_new(NULL
);
251 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
253 r
= cg_enumerate_subgroups(controller
, path
, &d
);
255 if (ret
>= 0 && r
!= -ENOENT
)
261 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
262 _cleanup_free_
char *p
= NULL
;
264 p
= strjoin(path
, "/", fn
, NULL
);
269 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
270 if (r
!= 0 && ret
>= 0)
274 if (ret
>= 0 && r
< 0)
278 r
= cg_rmdir(controller
, path
);
279 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
286 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
288 _cleanup_set_free_ Set
*s
= NULL
;
304 _cleanup_fclose_
FILE *f
= NULL
;
308 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
310 if (ret
>= 0 && r
!= -ENOENT
)
316 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
318 /* This might do weird stuff if we aren't a
319 * single-threaded program. However, we
320 * luckily know we are not */
321 if (ignore_self
&& pid
== my_pid
)
324 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
327 /* Ignore kernel threads. Since they can only
328 * exist in the root cgroup, we only check for
331 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
332 is_kernel_thread(pid
) > 0)
335 r
= cg_attach(cto
, pto
, pid
);
337 if (ret
>= 0 && r
!= -ESRCH
)
344 r
= set_put(s
, PID_TO_PTR(pid
));
364 int cg_migrate_recursive(
372 _cleanup_closedir_
DIR *d
= NULL
;
381 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
383 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
385 if (ret
>= 0 && r
!= -ENOENT
)
391 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
392 _cleanup_free_
char *p
= NULL
;
394 p
= strjoin(pfrom
, "/", fn
, NULL
);
399 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
400 if (r
!= 0 && ret
>= 0)
404 if (r
< 0 && ret
>= 0)
408 r
= cg_rmdir(cfrom
, pfrom
);
409 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
416 int cg_migrate_recursive_fallback(
431 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
433 char prefix
[strlen(pto
) + 1];
435 /* This didn't work? Then let's try all prefixes of the destination */
437 PATH_FOREACH_PREFIX(prefix
, pto
) {
440 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
449 static const char *controller_to_dirname(const char *controller
) {
454 /* Converts a controller name to the directory name below
455 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
456 * just cuts off the name= prefixed used for named
457 * hierarchies, if it is specified. */
459 e
= startswith(controller
, "name=");
466 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
473 dn
= controller_to_dirname(controller
);
475 if (isempty(path
) && isempty(suffix
))
476 t
= strappend("/sys/fs/cgroup/", dn
);
477 else if (isempty(path
))
478 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
479 else if (isempty(suffix
))
480 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
482 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
490 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
495 if (isempty(path
) && isempty(suffix
))
496 t
= strdup("/sys/fs/cgroup");
497 else if (isempty(path
))
498 t
= strappend("/sys/fs/cgroup/", suffix
);
499 else if (isempty(suffix
))
500 t
= strappend("/sys/fs/cgroup/", path
);
502 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
510 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
518 /* If no controller is specified, we return the path
519 * *below* the controllers, without any prefix. */
521 if (!path
&& !suffix
)
529 t
= strjoin(path
, "/", suffix
, NULL
);
533 *fs
= path_kill_slashes(t
);
537 if (!cg_controller_is_valid(controller
))
540 unified
= cg_unified();
545 r
= join_path_unified(path
, suffix
, fs
);
547 r
= join_path_legacy(controller
, path
, suffix
, fs
);
551 path_kill_slashes(*fs
);
555 static int controller_is_accessible(const char *controller
) {
560 /* Checks whether a specific controller is accessible,
561 * i.e. its hierarchy mounted. In the unified hierarchy all
562 * controllers are considered accessible, except for the named
565 if (!cg_controller_is_valid(controller
))
568 unified
= cg_unified();
572 /* We don't support named hierarchies if we are using
573 * the unified hierarchy. */
575 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
578 if (startswith(controller
, "name="))
584 dn
= controller_to_dirname(controller
);
585 cc
= strjoina("/sys/fs/cgroup/", dn
);
587 if (laccess(cc
, F_OK
) < 0)
594 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
600 /* Check if the specified controller is actually accessible */
601 r
= controller_is_accessible(controller
);
605 return cg_get_path(controller
, path
, suffix
, fs
);
608 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
613 if (typeflag
!= FTW_DP
)
616 if (ftwbuf
->level
< 1)
623 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
624 _cleanup_free_
char *fs
= NULL
;
629 r
= cg_get_path(controller
, path
, NULL
, &fs
);
634 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
644 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
651 int cg_create(const char *controller
, const char *path
) {
652 _cleanup_free_
char *fs
= NULL
;
655 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
659 r
= mkdir_parents(fs
, 0755);
663 if (mkdir(fs
, 0755) < 0) {
674 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
679 r
= cg_create(controller
, path
);
683 q
= cg_attach(controller
, path
, pid
);
687 /* This does not remove the cgroup on failure */
691 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
692 _cleanup_free_
char *fs
= NULL
;
693 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
699 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
706 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
708 return write_string_file(fs
, c
, 0);
711 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
718 r
= cg_attach(controller
, path
, pid
);
720 char prefix
[strlen(path
) + 1];
722 /* This didn't work? Then let's try all prefixes of
725 PATH_FOREACH_PREFIX(prefix
, path
) {
728 q
= cg_attach(controller
, prefix
, pid
);
737 int cg_set_group_access(
738 const char *controller
,
744 _cleanup_free_
char *fs
= NULL
;
747 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
750 if (mode
!= MODE_INVALID
)
753 r
= cg_get_path(controller
, path
, NULL
, &fs
);
757 return chmod_and_chown(fs
, mode
, uid
, gid
);
760 int cg_set_task_access(
761 const char *controller
,
767 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
772 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
775 if (mode
!= MODE_INVALID
)
778 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
782 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
786 unified
= cg_unified();
792 /* Compatibility, Always keep values for "tasks" in sync with
794 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
795 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
800 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
801 _cleanup_fclose_
FILE *f
= NULL
;
810 unified
= cg_unified();
815 if (!cg_controller_is_valid(controller
))
818 controller
= SYSTEMD_CGROUP_CONTROLLER
;
820 cs
= strlen(controller
);
823 fs
= procfs_file_alloca(pid
, "cgroup");
826 return errno
== ENOENT
? -ESRCH
: -errno
;
828 FOREACH_LINE(line
, f
, return -errno
) {
834 e
= startswith(line
, "0:");
844 const char *word
, *state
;
847 l
= strchr(line
, ':');
857 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
858 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
879 int cg_install_release_agent(const char *controller
, const char *agent
) {
880 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
886 unified
= cg_unified();
889 if (unified
) /* doesn't apply to unified hierarchy */
892 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
896 r
= read_one_line_file(fs
, &contents
);
900 sc
= strstrip(contents
);
902 r
= write_string_file(fs
, agent
, 0);
905 } else if (!path_equal(sc
, agent
))
909 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
913 contents
= mfree(contents
);
914 r
= read_one_line_file(fs
, &contents
);
918 sc
= strstrip(contents
);
919 if (streq(sc
, "0")) {
920 r
= write_string_file(fs
, "1", 0);
933 int cg_uninstall_release_agent(const char *controller
) {
934 _cleanup_free_
char *fs
= NULL
;
937 unified
= cg_unified();
940 if (unified
) /* Doesn't apply to unified hierarchy */
943 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
947 r
= write_string_file(fs
, "0", 0);
953 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
957 r
= write_string_file(fs
, "", 0);
964 int cg_is_empty(const char *controller
, const char *path
) {
965 _cleanup_fclose_
FILE *f
= NULL
;
971 r
= cg_enumerate_processes(controller
, path
, &f
);
977 r
= cg_read_pid(f
, &pid
);
984 int cg_is_empty_recursive(const char *controller
, const char *path
) {
989 /* The root cgroup is always populated */
990 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
993 unified
= cg_unified();
998 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1000 /* On the unified hierarchy we can check empty state
1001 * via the "cgroup.populated" attribute. */
1003 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1007 r
= read_one_line_file(populated
, &t
);
1013 return streq(t
, "0");
1015 _cleanup_closedir_
DIR *d
= NULL
;
1018 r
= cg_is_empty(controller
, path
);
1022 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1028 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1029 _cleanup_free_
char *p
= NULL
;
1031 p
= strjoin(path
, "/", fn
, NULL
);
1036 r
= cg_is_empty_recursive(controller
, p
);
1047 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1048 char *t
= NULL
, *u
= NULL
;
1054 if (!path_is_safe(spec
))
1062 *path
= path_kill_slashes(t
);
1071 e
= strchr(spec
, ':');
1073 if (!cg_controller_is_valid(spec
))
1090 t
= strndup(spec
, e
-spec
);
1093 if (!cg_controller_is_valid(t
)) {
1107 if (!path_is_safe(u
) ||
1108 !path_is_absolute(u
)) {
1114 path_kill_slashes(u
);
1130 int cg_mangle_path(const char *path
, char **result
) {
1131 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1138 /* First, check if it already is a filesystem path */
1139 if (path_startswith(path
, "/sys/fs/cgroup")) {
1145 *result
= path_kill_slashes(t
);
1149 /* Otherwise, treat it as cg spec */
1150 r
= cg_split_spec(path
, &c
, &p
);
1154 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1157 int cg_get_root_path(char **path
) {
1163 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1167 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1169 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1171 e
= endswith(p
, "/system"); /* even more legacy */
1179 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1180 _cleanup_free_
char *rt
= NULL
;
1188 /* If the root was specified let's use that, otherwise
1189 * let's determine it from PID 1 */
1191 r
= cg_get_root_path(&rt
);
1198 p
= path_startswith(cgroup
, root
);
1199 if (p
&& p
> cgroup
)
1207 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1208 _cleanup_free_
char *raw
= NULL
;
1215 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1219 r
= cg_shift_path(raw
, root
, &c
);
1239 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1246 n
= strcspn(cgroup
, "/");
1250 c
= strndupa(cgroup
, n
);
1253 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1264 static bool valid_slice_name(const char *p
, size_t n
) {
1269 if (n
< strlen("x.slice"))
1272 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1278 c
= cg_unescape(buf
);
1280 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1286 static const char *skip_slices(const char *p
) {
1289 /* Skips over all slice assignments */
1294 p
+= strspn(p
, "/");
1296 n
= strcspn(p
, "/");
1297 if (!valid_slice_name(p
, n
))
1304 int cg_path_get_unit(const char *path
, char **ret
) {
1312 e
= skip_slices(path
);
1314 r
= cg_path_decode_unit(e
, &unit
);
1318 /* We skipped over the slices, don't accept any now */
1319 if (endswith(unit
, ".slice")) {
1328 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1329 _cleanup_free_
char *cgroup
= NULL
;
1334 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1338 return cg_path_get_unit(cgroup
, unit
);
1342 * Skip session-*.scope, but require it to be there.
1344 static const char *skip_session(const char *p
) {
1350 p
+= strspn(p
, "/");
1352 n
= strcspn(p
, "/");
1353 if (n
< strlen("session-x.scope"))
1356 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1357 char buf
[n
- 8 - 6 + 1];
1359 memcpy(buf
, p
+ 8, n
- 8 - 6);
1362 /* Note that session scopes never need unescaping,
1363 * since they cannot conflict with the kernel's own
1364 * names, hence we don't need to call cg_unescape()
1367 if (!session_id_valid(buf
))
1371 p
+= strspn(p
, "/");
1379 * Skip user@*.service, but require it to be there.
1381 static const char *skip_user_manager(const char *p
) {
1387 p
+= strspn(p
, "/");
1389 n
= strcspn(p
, "/");
1390 if (n
< strlen("user@x.service"))
1393 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1394 char buf
[n
- 5 - 8 + 1];
1396 memcpy(buf
, p
+ 5, n
- 5 - 8);
1399 /* Note that user manager services never need unescaping,
1400 * since they cannot conflict with the kernel's own
1401 * names, hence we don't need to call cg_unescape()
1404 if (parse_uid(buf
, NULL
) < 0)
1408 p
+= strspn(p
, "/");
1416 static const char *skip_user_prefix(const char *path
) {
1421 /* Skip slices, if there are any */
1422 e
= skip_slices(path
);
1424 /* Skip the user manager, if it's in the path now... */
1425 t
= skip_user_manager(e
);
1429 /* Alternatively skip the user session if it is in the path... */
1430 return skip_session(e
);
1433 int cg_path_get_user_unit(const char *path
, char **ret
) {
1439 t
= skip_user_prefix(path
);
1443 /* And from here on it looks pretty much the same as for a
1444 * system unit, hence let's use the same parser from here
1446 return cg_path_get_unit(t
, ret
);
1449 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1450 _cleanup_free_
char *cgroup
= NULL
;
1455 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1459 return cg_path_get_user_unit(cgroup
, unit
);
1462 int cg_path_get_machine_name(const char *path
, char **machine
) {
1463 _cleanup_free_
char *u
= NULL
;
1467 r
= cg_path_get_unit(path
, &u
);
1471 sl
= strjoina("/run/systemd/machines/unit:", u
);
1472 return readlink_malloc(sl
, machine
);
1475 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1476 _cleanup_free_
char *cgroup
= NULL
;
1481 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1485 return cg_path_get_machine_name(cgroup
, machine
);
1488 int cg_path_get_session(const char *path
, char **session
) {
1489 _cleanup_free_
char *unit
= NULL
;
1495 r
= cg_path_get_unit(path
, &unit
);
1499 start
= startswith(unit
, "session-");
1502 end
= endswith(start
, ".scope");
1507 if (!session_id_valid(start
))
1523 int cg_pid_get_session(pid_t pid
, char **session
) {
1524 _cleanup_free_
char *cgroup
= NULL
;
1527 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1531 return cg_path_get_session(cgroup
, session
);
1534 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1535 _cleanup_free_
char *slice
= NULL
;
1541 r
= cg_path_get_slice(path
, &slice
);
1545 start
= startswith(slice
, "user-");
1548 end
= endswith(start
, ".slice");
1553 if (parse_uid(start
, uid
) < 0)
1559 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1560 _cleanup_free_
char *cgroup
= NULL
;
1563 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1567 return cg_path_get_owner_uid(cgroup
, uid
);
1570 int cg_path_get_slice(const char *p
, char **slice
) {
1571 const char *e
= NULL
;
1576 /* Finds the right-most slice unit from the beginning, but
1577 * stops before we come to the first non-slice unit. */
1582 p
+= strspn(p
, "/");
1584 n
= strcspn(p
, "/");
1585 if (!valid_slice_name(p
, n
)) {
1590 s
= strdup("-.slice");
1598 return cg_path_decode_unit(e
, slice
);
1606 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1607 _cleanup_free_
char *cgroup
= NULL
;
1612 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1616 return cg_path_get_slice(cgroup
, slice
);
1619 int cg_path_get_user_slice(const char *p
, char **slice
) {
1624 t
= skip_user_prefix(p
);
1628 /* And now it looks pretty much the same as for a system
1629 * slice, so let's just use the same parser from here on. */
1630 return cg_path_get_slice(t
, slice
);
1633 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1634 _cleanup_free_
char *cgroup
= NULL
;
1639 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1643 return cg_path_get_user_slice(cgroup
, slice
);
1646 char *cg_escape(const char *p
) {
1647 bool need_prefix
= false;
1649 /* This implements very minimal escaping for names to be used
1650 * as file names in the cgroup tree: any name which might
1651 * conflict with a kernel name or is prefixed with '_' is
1652 * prefixed with a '_'. That way, when reading cgroup names it
1653 * is sufficient to remove a single prefixing underscore if
1656 /* The return value of this function (unlike cg_unescape())
1662 streq(p
, "notify_on_release") ||
1663 streq(p
, "release_agent") ||
1664 streq(p
, "tasks") ||
1665 startswith(p
, "cgroup."))
1670 dot
= strrchr(p
, '.');
1675 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1678 n
= cgroup_controller_to_string(c
);
1683 if (memcmp(p
, n
, l
) != 0)
1693 return strappend("_", p
);
1698 char *cg_unescape(const char *p
) {
1701 /* The return value of this function (unlike cg_escape())
1702 * doesn't need free()! */
1710 #define CONTROLLER_VALID \
1714 bool cg_controller_is_valid(const char *p
) {
1720 s
= startswith(p
, "name=");
1724 if (*p
== 0 || *p
== '_')
1727 for (t
= p
; *t
; t
++)
1728 if (!strchr(CONTROLLER_VALID
, *t
))
1731 if (t
- p
> FILENAME_MAX
)
1737 int cg_slice_to_path(const char *unit
, char **ret
) {
1738 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1745 if (streq(unit
, "-.slice")) {
1755 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1758 if (!endswith(unit
, ".slice"))
1761 r
= unit_name_to_prefix(unit
, &p
);
1765 dash
= strchr(p
, '-');
1767 /* Don't allow initial dashes */
1772 _cleanup_free_
char *escaped
= NULL
;
1773 char n
[dash
- p
+ sizeof(".slice")];
1775 /* Don't allow trailing or double dashes */
1776 if (dash
[1] == 0 || dash
[1] == '-')
1779 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1780 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1783 escaped
= cg_escape(n
);
1787 if (!strextend(&s
, escaped
, "/", NULL
))
1790 dash
= strchr(dash
+1, '-');
1793 e
= cg_escape(unit
);
1797 if (!strextend(&s
, e
, NULL
))
1806 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1807 _cleanup_free_
char *p
= NULL
;
1810 r
= cg_get_path(controller
, path
, attribute
, &p
);
1814 return write_string_file(p
, value
, 0);
1817 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1818 _cleanup_free_
char *p
= NULL
;
1821 r
= cg_get_path(controller
, path
, attribute
, &p
);
1825 return read_one_line_file(p
, ret
);
1828 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1832 /* This one will create a cgroup in our private tree, but also
1833 * duplicate it in the trees specified in mask, and remove it
1836 /* First create the cgroup in our own hierarchy. */
1837 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1841 /* If we are in the unified hierarchy, we are done now */
1842 unified
= cg_unified();
1848 /* Otherwise, do the same in the other hierarchies */
1849 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1850 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1853 n
= cgroup_controller_to_string(c
);
1856 (void) cg_create(n
, path
);
1857 else if (supported
& bit
)
1858 (void) cg_trim(n
, path
, true);
1864 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1868 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1872 unified
= cg_unified();
1878 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1879 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1880 const char *p
= NULL
;
1882 if (!(supported
& bit
))
1886 p
= path_callback(bit
, userdata
);
1891 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1897 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1902 SET_FOREACH(pidp
, pids
, i
) {
1903 pid_t pid
= PTR_TO_PID(pidp
);
1906 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1907 if (q
< 0 && r
>= 0)
1914 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1918 if (!path_equal(from
, to
)) {
1919 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1924 unified
= cg_unified();
1930 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1931 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1932 const char *p
= NULL
;
1934 if (!(supported
& bit
))
1938 p
= to_callback(bit
, userdata
);
1943 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1949 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1953 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1957 unified
= cg_unified();
1963 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1964 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1966 if (!(supported
& bit
))
1969 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1975 int cg_mask_supported(CGroupMask
*ret
) {
1976 CGroupMask mask
= 0;
1979 /* Determines the mask of supported cgroup controllers. Only
1980 * includes controllers we can make sense of and that are
1981 * actually accessible. */
1983 unified
= cg_unified();
1987 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1990 /* In the unified hierarchy we can read the supported
1991 * and accessible controllers from a the top-level
1992 * cgroup attribute */
1994 r
= cg_get_root_path(&root
);
1998 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2002 r
= read_one_line_file(path
, &controllers
);
2008 _cleanup_free_
char *n
= NULL
;
2011 r
= extract_first_word(&c
, &n
, NULL
, 0);
2017 v
= cgroup_controller_from_string(n
);
2021 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2024 /* Currently, we only support the memory and pids
2025 * controller in the unified hierarchy, mask
2026 * everything else off. */
2027 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2032 /* In the legacy hierarchy, we check whether which
2033 * hierarchies are mounted. */
2035 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2038 n
= cgroup_controller_to_string(c
);
2039 if (controller_is_accessible(n
) >= 0)
2040 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2048 int cg_kernel_controllers(Set
*controllers
) {
2049 _cleanup_fclose_
FILE *f
= NULL
;
2053 assert(controllers
);
2055 /* Determines the full list of kernel-known controllers. Might
2056 * include controllers we don't actually support, arbitrary
2057 * named hierarchies and controllers that aren't currently
2058 * accessible (because not mounted). */
2060 f
= fopen("/proc/cgroups", "re");
2062 if (errno
== ENOENT
)
2067 /* Ignore the header line */
2068 (void) fgets(buf
, sizeof(buf
), f
);
2075 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2080 if (ferror(f
) && errno
!= 0)
2091 if (!cg_controller_is_valid(controller
)) {
2096 r
= set_consume(controllers
, controller
);
2104 static thread_local
int unified_cache
= -1;
2106 int cg_unified(void) {
2109 /* Checks if we support the unified hierarchy. Returns an
2110 * error when the cgroup hierarchies aren't mounted yet or we
2111 * have any other trouble determining if the unified hierarchy
2114 if (unified_cache
>= 0)
2115 return unified_cache
;
2117 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2120 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2121 unified_cache
= true;
2122 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2123 unified_cache
= false;
2127 return unified_cache
;
2130 void cg_unified_flush(void) {
2134 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2135 _cleanup_free_
char *fs
= NULL
;
2144 unified
= cg_unified();
2147 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2150 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2154 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2155 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2158 if (!(supported
& bit
))
2161 n
= cgroup_controller_to_string(c
);
2163 char s
[1 + strlen(n
) + 1];
2165 s
[0] = mask
& bit
? '+' : '-';
2168 r
= write_string_file(fs
, s
, 0);
2170 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2177 bool cg_is_unified_wanted(void) {
2178 static thread_local
int wanted
= -1;
2181 /* If the hierarchy is already mounted, then follow whatever
2182 * was chosen for it. */
2183 unified
= cg_unified();
2187 /* Otherwise, let's see what the kernel command line has to
2188 * say. Since checking that is expensive, let's cache the
2193 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2195 return (wanted
= true);
2197 _cleanup_free_
char *value
= NULL
;
2199 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2203 return (wanted
= false);
2205 return (wanted
= parse_boolean(value
) > 0);
2209 bool cg_is_legacy_wanted(void) {
2210 return !cg_is_unified_wanted();
2213 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2218 *ret
= CGROUP_CPU_SHARES_INVALID
;
2222 r
= safe_atou64(s
, &u
);
2226 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2233 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2238 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2242 r
= safe_atou64(s
, &u
);
2246 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2253 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2254 [CGROUP_CONTROLLER_CPU
] = "cpu",
2255 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2256 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2257 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2258 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2259 [CGROUP_CONTROLLER_PIDS
] = "pids",
2260 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2263 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);