1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
35 #include "formats-util.h"
36 #include "process-util.h"
37 #include "path-util.h"
38 #include "unit-name.h"
42 #include "login-util.h"
43 #include "cgroup-util.h"
45 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
46 _cleanup_free_
char *fs
= NULL
;
52 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
64 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
67 /* Note that the cgroup.procs might contain duplicates! See
68 * cgroups.txt for details. */
74 if (fscanf(f
, "%lu", &ul
) != 1) {
79 return errno
? -errno
: -EIO
;
89 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
90 _cleanup_free_
char *fs
= NULL
;
96 /* This is not recursive! */
98 r
= cg_get_path(controller
, path
, NULL
, &fs
);
110 int cg_read_subgroup(DIR *d
, char **fn
) {
116 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
119 if (de
->d_type
!= DT_DIR
)
122 if (streq(de
->d_name
, ".") ||
123 streq(de
->d_name
, ".."))
126 b
= strdup(de
->d_name
);
137 int cg_rmdir(const char *controller
, const char *path
) {
138 _cleanup_free_
char *p
= NULL
;
141 r
= cg_get_path(controller
, path
, NULL
, &p
);
146 if (r
< 0 && errno
!= ENOENT
)
152 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
153 _cleanup_set_free_ Set
*allocated_set
= NULL
;
160 /* This goes through the tasks list and kills them all. This
161 * is repeated until no further processes are added to the
162 * tasks list, to properly handle forking processes */
165 s
= allocated_set
= set_new(NULL
);
173 _cleanup_fclose_
FILE *f
= NULL
;
177 r
= cg_enumerate_processes(controller
, path
, &f
);
179 if (ret
>= 0 && r
!= -ENOENT
)
185 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
187 if (ignore_self
&& pid
== my_pid
)
190 if (set_get(s
, LONG_TO_PTR(pid
)) == LONG_TO_PTR(pid
))
193 /* If we haven't killed this process yet, kill
195 if (kill(pid
, sig
) < 0) {
196 if (ret
>= 0 && errno
!= ESRCH
)
199 if (sigcont
&& sig
!= SIGKILL
)
200 (void) kill(pid
, SIGCONT
);
208 r
= set_put(s
, LONG_TO_PTR(pid
));
224 /* To avoid racing against processes which fork
225 * quicker than we can kill them we repeat this until
226 * no new pids need to be killed. */
233 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
234 _cleanup_set_free_ Set
*allocated_set
= NULL
;
235 _cleanup_closedir_
DIR *d
= NULL
;
243 s
= allocated_set
= set_new(NULL
);
248 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
250 r
= cg_enumerate_subgroups(controller
, path
, &d
);
252 if (ret
>= 0 && r
!= -ENOENT
)
258 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
259 _cleanup_free_
char *p
= NULL
;
261 p
= strjoin(path
, "/", fn
, NULL
);
266 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
267 if (r
!= 0 && ret
>= 0)
271 if (ret
>= 0 && r
< 0)
275 r
= cg_rmdir(controller
, path
);
276 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
283 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
285 _cleanup_set_free_ Set
*s
= NULL
;
301 _cleanup_fclose_
FILE *f
= NULL
;
305 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
307 if (ret
>= 0 && r
!= -ENOENT
)
313 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
315 /* This might do weird stuff if we aren't a
316 * single-threaded program. However, we
317 * luckily know we are not */
318 if (ignore_self
&& pid
== my_pid
)
321 if (set_get(s
, LONG_TO_PTR(pid
)) == LONG_TO_PTR(pid
))
324 /* Ignore kernel threads. Since they can only
325 * exist in the root cgroup, we only check for
328 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
329 is_kernel_thread(pid
) > 0)
332 r
= cg_attach(cto
, pto
, pid
);
334 if (ret
>= 0 && r
!= -ESRCH
)
341 r
= set_put(s
, LONG_TO_PTR(pid
));
361 int cg_migrate_recursive(
369 _cleanup_closedir_
DIR *d
= NULL
;
378 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
380 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
382 if (ret
>= 0 && r
!= -ENOENT
)
388 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
389 _cleanup_free_
char *p
= NULL
;
391 p
= strjoin(pfrom
, "/", fn
, NULL
);
396 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
397 if (r
!= 0 && ret
>= 0)
401 if (r
< 0 && ret
>= 0)
405 r
= cg_rmdir(cfrom
, pfrom
);
406 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
413 int cg_migrate_recursive_fallback(
428 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
430 char prefix
[strlen(pto
) + 1];
432 /* This didn't work? Then let's try all prefixes of the destination */
434 PATH_FOREACH_PREFIX(prefix
, pto
) {
437 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
446 static const char *controller_to_dirname(const char *controller
) {
451 /* Converts a controller name to the directory name below
452 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
453 * just cuts off the name= prefixed used for named
454 * hierarchies, if it is specified. */
456 e
= startswith(controller
, "name=");
463 static int join_path_legacy(const char *controller_dn
, const char *path
, const char *suffix
, char **fs
) {
467 assert(controller_dn
);
469 if (isempty(path
) && isempty(suffix
))
470 t
= strappend("/sys/fs/cgroup/", controller_dn
);
471 else if (isempty(path
))
472 t
= strjoin("/sys/fs/cgroup/", controller_dn
, "/", suffix
, NULL
);
473 else if (isempty(suffix
))
474 t
= strjoin("/sys/fs/cgroup/", controller_dn
, "/", path
, NULL
);
476 t
= strjoin("/sys/fs/cgroup/", controller_dn
, "/", path
, "/", suffix
, NULL
);
484 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
489 if (isempty(path
) && isempty(suffix
))
490 t
= strdup("/sys/fs/cgroup");
491 else if (isempty(path
))
492 t
= strappend("/sys/fs/cgroup/", suffix
);
493 else if (isempty(suffix
))
494 t
= strappend("/sys/fs/cgroup/", path
);
496 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
504 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
512 /* If no controller is specified, we assume only the
513 * path below the controller matters */
515 if (!path
&& !suffix
)
520 else if (isempty(path
))
523 t
= strjoin(path
, "/", suffix
, NULL
);
527 *fs
= path_kill_slashes(t
);
531 if (!cg_controller_is_valid(controller
))
534 unified
= cg_unified();
539 r
= join_path_unified(path
, suffix
, fs
);
544 dn
= controller_to_dirname(controller
);
548 r
= join_path_legacy(dn
, path
, suffix
, fs
);
554 path_kill_slashes(*fs
);
558 static int controller_is_accessible(const char *controller
) {
563 /* Checks whether a specific controller is accessible,
564 * i.e. its hierarchy mounted. In the unified hierarchy all
565 * controllers are considered accessible, except for the named
568 if (!cg_controller_is_valid(controller
))
571 unified
= cg_unified();
575 /* We don't support named hierarchies if we are using
576 * the unified hierarchy. */
578 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
581 if (startswith(controller
, "name="))
587 dn
= controller_to_dirname(controller
);
588 cc
= strjoina("/sys/fs/cgroup/", dn
);
590 if (laccess(cc
, F_OK
) < 0)
597 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
603 /* Check if the specified controller is actually accessible */
604 r
= controller_is_accessible(controller
);
608 return cg_get_path(controller
, path
, suffix
, fs
);
611 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
616 if (typeflag
!= FTW_DP
)
619 if (ftwbuf
->level
< 1)
626 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
627 _cleanup_free_
char *fs
= NULL
;
632 r
= cg_get_path(controller
, path
, NULL
, &fs
);
637 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
647 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
654 int cg_create(const char *controller
, const char *path
) {
655 _cleanup_free_
char *fs
= NULL
;
658 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
662 r
= mkdir_parents(fs
, 0755);
666 if (mkdir(fs
, 0755) < 0) {
677 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
682 r
= cg_create(controller
, path
);
686 q
= cg_attach(controller
, path
, pid
);
690 /* This does not remove the cgroup on failure */
694 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
695 _cleanup_free_
char *fs
= NULL
;
696 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
702 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
709 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
711 return write_string_file(fs
, c
, 0);
714 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
721 r
= cg_attach(controller
, path
, pid
);
723 char prefix
[strlen(path
) + 1];
725 /* This didn't work? Then let's try all prefixes of
728 PATH_FOREACH_PREFIX(prefix
, path
) {
731 q
= cg_attach(controller
, prefix
, pid
);
740 int cg_set_group_access(
741 const char *controller
,
747 _cleanup_free_
char *fs
= NULL
;
750 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
753 if (mode
!= MODE_INVALID
)
756 r
= cg_get_path(controller
, path
, NULL
, &fs
);
760 return chmod_and_chown(fs
, mode
, uid
, gid
);
763 int cg_set_task_access(
764 const char *controller
,
770 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
775 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
778 if (mode
!= MODE_INVALID
)
781 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
785 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
789 unified
= cg_unified();
795 /* Compatibility, Always keep values for "tasks" in sync with
797 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
798 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
803 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
804 _cleanup_fclose_
FILE *f
= NULL
;
813 unified
= cg_unified();
818 if (!cg_controller_is_valid(controller
))
821 controller
= SYSTEMD_CGROUP_CONTROLLER
;
823 cs
= strlen(controller
);
826 fs
= procfs_file_alloca(pid
, "cgroup");
829 return errno
== ENOENT
? -ESRCH
: -errno
;
831 FOREACH_LINE(line
, f
, return -errno
) {
837 e
= startswith(line
, "0:");
847 const char *word
, *state
;
850 l
= strchr(line
, ':');
860 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
861 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
882 int cg_install_release_agent(const char *controller
, const char *agent
) {
883 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
889 unified
= cg_unified();
892 if (unified
) /* doesn't apply to unified hierarchy */
895 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
899 r
= read_one_line_file(fs
, &contents
);
903 sc
= strstrip(contents
);
905 r
= write_string_file(fs
, agent
, 0);
908 } else if (!streq(sc
, agent
))
912 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
916 contents
= mfree(contents
);
917 r
= read_one_line_file(fs
, &contents
);
921 sc
= strstrip(contents
);
922 if (streq(sc
, "0")) {
923 r
= write_string_file(fs
, "1", 0);
936 int cg_uninstall_release_agent(const char *controller
) {
937 _cleanup_free_
char *fs
= NULL
;
940 unified
= cg_unified();
943 if (unified
) /* Doesn't apply to unified hierarchy */
946 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
950 r
= write_string_file(fs
, "0", 0);
956 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
960 r
= write_string_file(fs
, "", 0);
967 int cg_is_empty(const char *controller
, const char *path
) {
968 _cleanup_fclose_
FILE *f
= NULL
;
974 r
= cg_enumerate_processes(controller
, path
, &f
);
980 r
= cg_read_pid(f
, &pid
);
987 int cg_is_empty_recursive(const char *controller
, const char *path
) {
992 /* The root cgroup is always populated */
993 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
996 unified
= cg_unified();
1001 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1003 /* On the unified hierarchy we can check empty state
1004 * via the "cgroup.populated" attribute. */
1006 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1010 r
= read_one_line_file(populated
, &t
);
1014 return streq(t
, "0");
1016 _cleanup_closedir_
DIR *d
= NULL
;
1019 r
= cg_is_empty(controller
, path
);
1023 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1029 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1030 _cleanup_free_
char *p
= NULL
;
1032 p
= strjoin(path
, "/", fn
, NULL
);
1037 r
= cg_is_empty_recursive(controller
, p
);
1048 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1049 char *t
= NULL
, *u
= NULL
;
1055 if (!path_is_safe(spec
))
1063 *path
= path_kill_slashes(t
);
1072 e
= strchr(spec
, ':');
1074 if (!cg_controller_is_valid(spec
))
1091 t
= strndup(spec
, e
-spec
);
1094 if (!cg_controller_is_valid(t
)) {
1108 if (!path_is_safe(u
) ||
1109 !path_is_absolute(u
)) {
1115 path_kill_slashes(u
);
1131 int cg_mangle_path(const char *path
, char **result
) {
1132 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1139 /* First, check if it already is a filesystem path */
1140 if (path_startswith(path
, "/sys/fs/cgroup")) {
1146 *result
= path_kill_slashes(t
);
1150 /* Otherwise, treat it as cg spec */
1151 r
= cg_split_spec(path
, &c
, &p
);
1155 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1158 int cg_get_root_path(char **path
) {
1164 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1168 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1170 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1172 e
= endswith(p
, "/system"); /* even more legacy */
1180 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1181 _cleanup_free_
char *rt
= NULL
;
1189 /* If the root was specified let's use that, otherwise
1190 * let's determine it from PID 1 */
1192 r
= cg_get_root_path(&rt
);
1199 p
= path_startswith(cgroup
, root
);
1200 if (p
&& p
> cgroup
)
1208 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1209 _cleanup_free_
char *raw
= NULL
;
1216 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1220 r
= cg_shift_path(raw
, root
, &c
);
1240 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1247 n
= strcspn(cgroup
, "/");
1251 c
= strndupa(cgroup
, n
);
1254 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1265 static bool valid_slice_name(const char *p
, size_t n
) {
1270 if (n
< strlen("x.slice"))
1273 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1279 c
= cg_unescape(buf
);
1281 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1287 static const char *skip_slices(const char *p
) {
1290 /* Skips over all slice assignments */
1295 p
+= strspn(p
, "/");
1297 n
= strcspn(p
, "/");
1298 if (!valid_slice_name(p
, n
))
1305 int cg_path_get_unit(const char *path
, char **ret
) {
1313 e
= skip_slices(path
);
1315 r
= cg_path_decode_unit(e
, &unit
);
1319 /* We skipped over the slices, don't accept any now */
1320 if (endswith(unit
, ".slice")) {
1329 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1330 _cleanup_free_
char *cgroup
= NULL
;
1335 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1339 return cg_path_get_unit(cgroup
, unit
);
1343 * Skip session-*.scope, but require it to be there.
1345 static const char *skip_session(const char *p
) {
1351 p
+= strspn(p
, "/");
1353 n
= strcspn(p
, "/");
1354 if (n
< strlen("session-x.scope"))
1357 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1358 char buf
[n
- 8 - 6 + 1];
1360 memcpy(buf
, p
+ 8, n
- 8 - 6);
1363 /* Note that session scopes never need unescaping,
1364 * since they cannot conflict with the kernel's own
1365 * names, hence we don't need to call cg_unescape()
1368 if (!session_id_valid(buf
))
1372 p
+= strspn(p
, "/");
1380 * Skip user@*.service, but require it to be there.
1382 static const char *skip_user_manager(const char *p
) {
1388 p
+= strspn(p
, "/");
1390 n
= strcspn(p
, "/");
1391 if (n
< strlen("user@x.service"))
1394 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1395 char buf
[n
- 5 - 8 + 1];
1397 memcpy(buf
, p
+ 5, n
- 5 - 8);
1400 /* Note that user manager services never need unescaping,
1401 * since they cannot conflict with the kernel's own
1402 * names, hence we don't need to call cg_unescape()
1405 if (parse_uid(buf
, NULL
) < 0)
1409 p
+= strspn(p
, "/");
1417 static const char *skip_user_prefix(const char *path
) {
1422 /* Skip slices, if there are any */
1423 e
= skip_slices(path
);
1425 /* Skip the user manager, if it's in the path now... */
1426 t
= skip_user_manager(e
);
1430 /* Alternatively skip the user session if it is in the path... */
1431 return skip_session(e
);
1434 int cg_path_get_user_unit(const char *path
, char **ret
) {
1440 t
= skip_user_prefix(path
);
1444 /* And from here on it looks pretty much the same as for a
1445 * system unit, hence let's use the same parser from here
1447 return cg_path_get_unit(t
, ret
);
1450 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1451 _cleanup_free_
char *cgroup
= NULL
;
1456 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1460 return cg_path_get_user_unit(cgroup
, unit
);
1463 int cg_path_get_machine_name(const char *path
, char **machine
) {
1464 _cleanup_free_
char *u
= NULL
;
1468 r
= cg_path_get_unit(path
, &u
);
1472 sl
= strjoina("/run/systemd/machines/unit:", u
);
1473 return readlink_malloc(sl
, machine
);
1476 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1477 _cleanup_free_
char *cgroup
= NULL
;
1482 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1486 return cg_path_get_machine_name(cgroup
, machine
);
1489 int cg_path_get_session(const char *path
, char **session
) {
1490 _cleanup_free_
char *unit
= NULL
;
1496 r
= cg_path_get_unit(path
, &unit
);
1500 start
= startswith(unit
, "session-");
1503 end
= endswith(start
, ".scope");
1508 if (!session_id_valid(start
))
1524 int cg_pid_get_session(pid_t pid
, char **session
) {
1525 _cleanup_free_
char *cgroup
= NULL
;
1528 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1532 return cg_path_get_session(cgroup
, session
);
1535 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1536 _cleanup_free_
char *slice
= NULL
;
1542 r
= cg_path_get_slice(path
, &slice
);
1546 start
= startswith(slice
, "user-");
1549 end
= endswith(start
, ".slice");
1554 if (parse_uid(start
, uid
) < 0)
1560 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1561 _cleanup_free_
char *cgroup
= NULL
;
1564 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1568 return cg_path_get_owner_uid(cgroup
, uid
);
1571 int cg_path_get_slice(const char *p
, char **slice
) {
1572 const char *e
= NULL
;
1577 /* Finds the right-most slice unit from the beginning, but
1578 * stops before we come to the first non-slice unit. */
1583 p
+= strspn(p
, "/");
1585 n
= strcspn(p
, "/");
1586 if (!valid_slice_name(p
, n
)) {
1591 s
= strdup("-.slice");
1599 return cg_path_decode_unit(e
, slice
);
1607 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1608 _cleanup_free_
char *cgroup
= NULL
;
1613 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1617 return cg_path_get_slice(cgroup
, slice
);
1620 int cg_path_get_user_slice(const char *p
, char **slice
) {
1625 t
= skip_user_prefix(p
);
1629 /* And now it looks pretty much the same as for a system
1630 * slice, so let's just use the same parser from here on. */
1631 return cg_path_get_slice(t
, slice
);
1634 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1635 _cleanup_free_
char *cgroup
= NULL
;
1640 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1644 return cg_path_get_user_slice(cgroup
, slice
);
1647 char *cg_escape(const char *p
) {
1648 bool need_prefix
= false;
1650 /* This implements very minimal escaping for names to be used
1651 * as file names in the cgroup tree: any name which might
1652 * conflict with a kernel name or is prefixed with '_' is
1653 * prefixed with a '_'. That way, when reading cgroup names it
1654 * is sufficient to remove a single prefixing underscore if
1657 /* The return value of this function (unlike cg_unescape())
1663 streq(p
, "notify_on_release") ||
1664 streq(p
, "release_agent") ||
1665 streq(p
, "tasks") ||
1666 startswith(p
, "cgroup."))
1671 dot
= strrchr(p
, '.');
1676 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1679 n
= cgroup_controller_to_string(c
);
1684 if (memcmp(p
, n
, l
) != 0)
1694 return strappend("_", p
);
1699 char *cg_unescape(const char *p
) {
1702 /* The return value of this function (unlike cg_escape())
1703 * doesn't need free()! */
1711 #define CONTROLLER_VALID \
1715 bool cg_controller_is_valid(const char *p
) {
1721 s
= startswith(p
, "name=");
1725 if (*p
== 0 || *p
== '_')
1728 for (t
= p
; *t
; t
++)
1729 if (!strchr(CONTROLLER_VALID
, *t
))
1732 if (t
- p
> FILENAME_MAX
)
1738 int cg_slice_to_path(const char *unit
, char **ret
) {
1739 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1746 if (streq(unit
, "-.slice")) {
1756 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1759 if (!endswith(unit
, ".slice"))
1762 r
= unit_name_to_prefix(unit
, &p
);
1766 dash
= strchr(p
, '-');
1768 /* Don't allow initial dashes */
1773 _cleanup_free_
char *escaped
= NULL
;
1774 char n
[dash
- p
+ sizeof(".slice")];
1776 /* Don't allow trailing or double dashes */
1777 if (dash
[1] == 0 || dash
[1] == '-')
1780 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1781 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1784 escaped
= cg_escape(n
);
1788 if (!strextend(&s
, escaped
, "/", NULL
))
1791 dash
= strchr(dash
+1, '-');
1794 e
= cg_escape(unit
);
1798 if (!strextend(&s
, e
, NULL
))
1807 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1808 _cleanup_free_
char *p
= NULL
;
1811 r
= cg_get_path(controller
, path
, attribute
, &p
);
1815 return write_string_file(p
, value
, 0);
1818 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1819 _cleanup_free_
char *p
= NULL
;
1822 r
= cg_get_path(controller
, path
, attribute
, &p
);
1826 return read_one_line_file(p
, ret
);
1829 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1833 /* This one will create a cgroup in our private tree, but also
1834 * duplicate it in the trees specified in mask, and remove it
1837 /* First create the cgroup in our own hierarchy. */
1838 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1842 /* If we are in the unified hierarchy, we are done now */
1843 unified
= cg_unified();
1849 /* Otherwise, do the same in the other hierarchies */
1850 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1851 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1854 n
= cgroup_controller_to_string(c
);
1857 (void) cg_create(n
, path
);
1858 else if (supported
& bit
)
1859 (void) cg_trim(n
, path
, true);
1865 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1869 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1873 unified
= cg_unified();
1879 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1880 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1881 const char *p
= NULL
;
1883 if (!(supported
& bit
))
1887 p
= path_callback(bit
, userdata
);
1892 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1898 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1903 SET_FOREACH(pidp
, pids
, i
) {
1904 pid_t pid
= PTR_TO_LONG(pidp
);
1907 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1908 if (q
< 0 && r
>= 0)
1915 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1919 if (!path_equal(from
, to
)) {
1920 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1925 unified
= cg_unified();
1931 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1932 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1933 const char *p
= NULL
;
1935 if (!(supported
& bit
))
1939 p
= to_callback(bit
, userdata
);
1944 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1950 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1954 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1958 unified
= cg_unified();
1964 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1965 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1967 if (!(supported
& bit
))
1970 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1976 int cg_mask_supported(CGroupMask
*ret
) {
1977 CGroupMask mask
= 0;
1980 /* Determines the mask of supported cgroup controllers. Only
1981 * includes controllers we can make sense of and that are
1982 * actually accessible. */
1984 unified
= cg_unified();
1988 _cleanup_free_
char *controllers
= NULL
;
1991 /* In the unified hierarchy we can read the supported
1992 * and accessible controllers from a the top-level
1993 * cgroup attribute */
1995 r
= read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers
);
2001 _cleanup_free_
char *n
= NULL
;
2004 r
= extract_first_word(&c
, &n
, NULL
, 0);
2010 v
= cgroup_controller_from_string(n
);
2014 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2017 /* Currently, we only support the memory controller in
2018 * the unified hierarchy, mask everything else off. */
2019 mask
&= CGROUP_MASK_MEMORY
;
2024 /* In the legacy hierarchy, we check whether which
2025 * hierarchies are mounted. */
2027 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2030 n
= cgroup_controller_to_string(c
);
2031 if (controller_is_accessible(n
) >= 0)
2032 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2040 int cg_kernel_controllers(Set
*controllers
) {
2041 _cleanup_fclose_
FILE *f
= NULL
;
2045 assert(controllers
);
2047 /* Determines the full list of kernel-known controllers. Might
2048 * include controllers we don't actually support, arbitrary
2049 * named hierarchies and controllers that aren't currently
2050 * accessible (because not mounted). */
2052 f
= fopen("/proc/cgroups", "re");
2054 if (errno
== ENOENT
)
2059 /* Ignore the header line */
2060 (void) fgets(buf
, sizeof(buf
), f
);
2067 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2072 if (ferror(f
) && errno
!= 0)
2083 if (!cg_controller_is_valid(controller
)) {
2088 r
= set_consume(controllers
, controller
);
2096 static thread_local
int unified_cache
= -1;
2098 int cg_unified(void) {
2101 /* Checks if we support the unified hierarchy. Returns an
2102 * error when the cgroup hierarchies aren't mounted yet or we
2103 * have any other trouble determining if the unified hierarchy
2106 if (unified_cache
>= 0)
2107 return unified_cache
;
2109 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2112 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2113 unified_cache
= true;
2114 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2115 unified_cache
= false;
2119 return unified_cache
;
2122 void cg_unified_flush(void) {
2126 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2127 _cleanup_free_
char *fs
= NULL
;
2136 unified
= cg_unified();
2139 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2142 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2146 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2147 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2150 if (!(supported
& bit
))
2153 n
= cgroup_controller_to_string(c
);
2155 char s
[1 + strlen(n
) + 1];
2157 s
[0] = mask
& bit
? '+' : '-';
2160 r
= write_string_file(fs
, s
, 0);
2162 log_warning_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2169 bool cg_is_unified_wanted(void) {
2170 static thread_local
int wanted
= -1;
2173 /* If the hierarchy is already mounted, then follow whatever
2174 * was chosen for it. */
2175 unified
= cg_unified();
2179 /* Otherwise, let's see what the kernel command line has to
2180 * say. Since checking that is expensive, let's cache the
2185 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2187 return (wanted
= true);
2189 _cleanup_free_
char *value
= NULL
;
2191 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2195 return (wanted
= false);
2197 return (wanted
= parse_boolean(value
) > 0);
2201 bool cg_is_legacy_wanted(void) {
2202 return !cg_is_unified_wanted();
2205 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2206 [CGROUP_CONTROLLER_CPU
] = "cpu",
2207 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2208 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2209 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2210 [CGROUP_CONTROLLER_DEVICE
] = "device",
2213 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);