1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
33 #include "dirent-util.h"
34 #include "extract-word.h"
37 #include "formats-util.h"
39 #include "login-util.h"
42 #include "parse-util.h"
43 #include "path-util.h"
44 #include "process-util.h"
47 #include "string-util.h"
48 #include "unit-name.h"
49 #include "user-util.h"
52 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
53 _cleanup_free_
char *fs
= NULL
;
59 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
71 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
74 /* Note that the cgroup.procs might contain duplicates! See
75 * cgroups.txt for details. */
81 if (fscanf(f
, "%lu", &ul
) != 1) {
86 return errno
? -errno
: -EIO
;
96 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
97 _cleanup_free_
char *fs
= NULL
;
103 /* This is not recursive! */
105 r
= cg_get_path(controller
, path
, NULL
, &fs
);
117 int cg_read_subgroup(DIR *d
, char **fn
) {
123 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
126 if (de
->d_type
!= DT_DIR
)
129 if (streq(de
->d_name
, ".") ||
130 streq(de
->d_name
, ".."))
133 b
= strdup(de
->d_name
);
144 int cg_rmdir(const char *controller
, const char *path
) {
145 _cleanup_free_
char *p
= NULL
;
148 r
= cg_get_path(controller
, path
, NULL
, &p
);
153 if (r
< 0 && errno
!= ENOENT
)
159 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
160 _cleanup_set_free_ Set
*allocated_set
= NULL
;
167 /* This goes through the tasks list and kills them all. This
168 * is repeated until no further processes are added to the
169 * tasks list, to properly handle forking processes */
172 s
= allocated_set
= set_new(NULL
);
180 _cleanup_fclose_
FILE *f
= NULL
;
184 r
= cg_enumerate_processes(controller
, path
, &f
);
186 if (ret
>= 0 && r
!= -ENOENT
)
192 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
194 if (ignore_self
&& pid
== my_pid
)
197 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
200 /* If we haven't killed this process yet, kill
202 if (kill(pid
, sig
) < 0) {
203 if (ret
>= 0 && errno
!= ESRCH
)
206 if (sigcont
&& sig
!= SIGKILL
)
207 (void) kill(pid
, SIGCONT
);
215 r
= set_put(s
, PID_TO_PTR(pid
));
231 /* To avoid racing against processes which fork
232 * quicker than we can kill them we repeat this until
233 * no new pids need to be killed. */
240 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
241 _cleanup_set_free_ Set
*allocated_set
= NULL
;
242 _cleanup_closedir_
DIR *d
= NULL
;
250 s
= allocated_set
= set_new(NULL
);
255 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
257 r
= cg_enumerate_subgroups(controller
, path
, &d
);
259 if (ret
>= 0 && r
!= -ENOENT
)
265 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
266 _cleanup_free_
char *p
= NULL
;
268 p
= strjoin(path
, "/", fn
, NULL
);
273 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
274 if (r
!= 0 && ret
>= 0)
278 if (ret
>= 0 && r
< 0)
282 r
= cg_rmdir(controller
, path
);
283 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
290 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
292 _cleanup_set_free_ Set
*s
= NULL
;
308 _cleanup_fclose_
FILE *f
= NULL
;
312 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
314 if (ret
>= 0 && r
!= -ENOENT
)
320 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
322 /* This might do weird stuff if we aren't a
323 * single-threaded program. However, we
324 * luckily know we are not */
325 if (ignore_self
&& pid
== my_pid
)
328 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
331 /* Ignore kernel threads. Since they can only
332 * exist in the root cgroup, we only check for
335 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
336 is_kernel_thread(pid
) > 0)
339 r
= cg_attach(cto
, pto
, pid
);
341 if (ret
>= 0 && r
!= -ESRCH
)
348 r
= set_put(s
, PID_TO_PTR(pid
));
368 int cg_migrate_recursive(
376 _cleanup_closedir_
DIR *d
= NULL
;
385 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
387 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
389 if (ret
>= 0 && r
!= -ENOENT
)
395 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
396 _cleanup_free_
char *p
= NULL
;
398 p
= strjoin(pfrom
, "/", fn
, NULL
);
403 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
404 if (r
!= 0 && ret
>= 0)
408 if (r
< 0 && ret
>= 0)
412 r
= cg_rmdir(cfrom
, pfrom
);
413 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
420 int cg_migrate_recursive_fallback(
435 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
437 char prefix
[strlen(pto
) + 1];
439 /* This didn't work? Then let's try all prefixes of the destination */
441 PATH_FOREACH_PREFIX(prefix
, pto
) {
444 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
453 static const char *controller_to_dirname(const char *controller
) {
458 /* Converts a controller name to the directory name below
459 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
460 * just cuts off the name= prefixed used for named
461 * hierarchies, if it is specified. */
463 e
= startswith(controller
, "name=");
470 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
477 dn
= controller_to_dirname(controller
);
479 if (isempty(path
) && isempty(suffix
))
480 t
= strappend("/sys/fs/cgroup/", dn
);
481 else if (isempty(path
))
482 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
483 else if (isempty(suffix
))
484 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
486 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
494 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
499 if (isempty(path
) && isempty(suffix
))
500 t
= strdup("/sys/fs/cgroup");
501 else if (isempty(path
))
502 t
= strappend("/sys/fs/cgroup/", suffix
);
503 else if (isempty(suffix
))
504 t
= strappend("/sys/fs/cgroup/", path
);
506 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
514 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
522 /* If no controller is specified, we return the path
523 * *below* the controllers, without any prefix. */
525 if (!path
&& !suffix
)
533 t
= strjoin(path
, "/", suffix
, NULL
);
537 *fs
= path_kill_slashes(t
);
541 if (!cg_controller_is_valid(controller
))
544 unified
= cg_unified();
549 r
= join_path_unified(path
, suffix
, fs
);
551 r
= join_path_legacy(controller
, path
, suffix
, fs
);
555 path_kill_slashes(*fs
);
559 static int controller_is_accessible(const char *controller
) {
564 /* Checks whether a specific controller is accessible,
565 * i.e. its hierarchy mounted. In the unified hierarchy all
566 * controllers are considered accessible, except for the named
569 if (!cg_controller_is_valid(controller
))
572 unified
= cg_unified();
576 /* We don't support named hierarchies if we are using
577 * the unified hierarchy. */
579 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
582 if (startswith(controller
, "name="))
588 dn
= controller_to_dirname(controller
);
589 cc
= strjoina("/sys/fs/cgroup/", dn
);
591 if (laccess(cc
, F_OK
) < 0)
598 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
604 /* Check if the specified controller is actually accessible */
605 r
= controller_is_accessible(controller
);
609 return cg_get_path(controller
, path
, suffix
, fs
);
612 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
617 if (typeflag
!= FTW_DP
)
620 if (ftwbuf
->level
< 1)
627 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
628 _cleanup_free_
char *fs
= NULL
;
633 r
= cg_get_path(controller
, path
, NULL
, &fs
);
638 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
648 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
655 int cg_create(const char *controller
, const char *path
) {
656 _cleanup_free_
char *fs
= NULL
;
659 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
663 r
= mkdir_parents(fs
, 0755);
667 if (mkdir(fs
, 0755) < 0) {
678 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
683 r
= cg_create(controller
, path
);
687 q
= cg_attach(controller
, path
, pid
);
691 /* This does not remove the cgroup on failure */
695 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
696 _cleanup_free_
char *fs
= NULL
;
697 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
703 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
710 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
712 return write_string_file(fs
, c
, 0);
715 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
722 r
= cg_attach(controller
, path
, pid
);
724 char prefix
[strlen(path
) + 1];
726 /* This didn't work? Then let's try all prefixes of
729 PATH_FOREACH_PREFIX(prefix
, path
) {
732 q
= cg_attach(controller
, prefix
, pid
);
741 int cg_set_group_access(
742 const char *controller
,
748 _cleanup_free_
char *fs
= NULL
;
751 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
754 if (mode
!= MODE_INVALID
)
757 r
= cg_get_path(controller
, path
, NULL
, &fs
);
761 return chmod_and_chown(fs
, mode
, uid
, gid
);
764 int cg_set_task_access(
765 const char *controller
,
771 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
776 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
779 if (mode
!= MODE_INVALID
)
782 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
786 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
790 unified
= cg_unified();
796 /* Compatibility, Always keep values for "tasks" in sync with
798 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
799 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
804 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
805 _cleanup_fclose_
FILE *f
= NULL
;
814 unified
= cg_unified();
819 if (!cg_controller_is_valid(controller
))
822 controller
= SYSTEMD_CGROUP_CONTROLLER
;
824 cs
= strlen(controller
);
827 fs
= procfs_file_alloca(pid
, "cgroup");
830 return errno
== ENOENT
? -ESRCH
: -errno
;
832 FOREACH_LINE(line
, f
, return -errno
) {
838 e
= startswith(line
, "0:");
848 const char *word
, *state
;
851 l
= strchr(line
, ':');
861 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
862 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
883 int cg_install_release_agent(const char *controller
, const char *agent
) {
884 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
890 unified
= cg_unified();
893 if (unified
) /* doesn't apply to unified hierarchy */
896 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
900 r
= read_one_line_file(fs
, &contents
);
904 sc
= strstrip(contents
);
906 r
= write_string_file(fs
, agent
, 0);
909 } else if (!path_equal(sc
, agent
))
913 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
917 contents
= mfree(contents
);
918 r
= read_one_line_file(fs
, &contents
);
922 sc
= strstrip(contents
);
923 if (streq(sc
, "0")) {
924 r
= write_string_file(fs
, "1", 0);
937 int cg_uninstall_release_agent(const char *controller
) {
938 _cleanup_free_
char *fs
= NULL
;
941 unified
= cg_unified();
944 if (unified
) /* Doesn't apply to unified hierarchy */
947 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
951 r
= write_string_file(fs
, "0", 0);
957 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
961 r
= write_string_file(fs
, "", 0);
968 int cg_is_empty(const char *controller
, const char *path
) {
969 _cleanup_fclose_
FILE *f
= NULL
;
975 r
= cg_enumerate_processes(controller
, path
, &f
);
981 r
= cg_read_pid(f
, &pid
);
988 int cg_is_empty_recursive(const char *controller
, const char *path
) {
993 /* The root cgroup is always populated */
994 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
997 unified
= cg_unified();
1002 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1004 /* On the unified hierarchy we can check empty state
1005 * via the "cgroup.populated" attribute. */
1007 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1011 r
= read_one_line_file(populated
, &t
);
1017 return streq(t
, "0");
1019 _cleanup_closedir_
DIR *d
= NULL
;
1022 r
= cg_is_empty(controller
, path
);
1026 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1032 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1033 _cleanup_free_
char *p
= NULL
;
1035 p
= strjoin(path
, "/", fn
, NULL
);
1040 r
= cg_is_empty_recursive(controller
, p
);
1051 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1052 char *t
= NULL
, *u
= NULL
;
1058 if (!path_is_safe(spec
))
1066 *path
= path_kill_slashes(t
);
1075 e
= strchr(spec
, ':');
1077 if (!cg_controller_is_valid(spec
))
1094 t
= strndup(spec
, e
-spec
);
1097 if (!cg_controller_is_valid(t
)) {
1111 if (!path_is_safe(u
) ||
1112 !path_is_absolute(u
)) {
1118 path_kill_slashes(u
);
1134 int cg_mangle_path(const char *path
, char **result
) {
1135 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1142 /* First, check if it already is a filesystem path */
1143 if (path_startswith(path
, "/sys/fs/cgroup")) {
1149 *result
= path_kill_slashes(t
);
1153 /* Otherwise, treat it as cg spec */
1154 r
= cg_split_spec(path
, &c
, &p
);
1158 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1161 int cg_get_root_path(char **path
) {
1167 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1171 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1173 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1175 e
= endswith(p
, "/system"); /* even more legacy */
1183 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1184 _cleanup_free_
char *rt
= NULL
;
1192 /* If the root was specified let's use that, otherwise
1193 * let's determine it from PID 1 */
1195 r
= cg_get_root_path(&rt
);
1202 p
= path_startswith(cgroup
, root
);
1203 if (p
&& p
> cgroup
)
1211 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1212 _cleanup_free_
char *raw
= NULL
;
1219 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1223 r
= cg_shift_path(raw
, root
, &c
);
1243 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1250 n
= strcspn(cgroup
, "/");
1254 c
= strndupa(cgroup
, n
);
1257 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1268 static bool valid_slice_name(const char *p
, size_t n
) {
1273 if (n
< strlen("x.slice"))
1276 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1282 c
= cg_unescape(buf
);
1284 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1290 static const char *skip_slices(const char *p
) {
1293 /* Skips over all slice assignments */
1298 p
+= strspn(p
, "/");
1300 n
= strcspn(p
, "/");
1301 if (!valid_slice_name(p
, n
))
1308 int cg_path_get_unit(const char *path
, char **ret
) {
1316 e
= skip_slices(path
);
1318 r
= cg_path_decode_unit(e
, &unit
);
1322 /* We skipped over the slices, don't accept any now */
1323 if (endswith(unit
, ".slice")) {
1332 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1333 _cleanup_free_
char *cgroup
= NULL
;
1338 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1342 return cg_path_get_unit(cgroup
, unit
);
1346 * Skip session-*.scope, but require it to be there.
1348 static const char *skip_session(const char *p
) {
1354 p
+= strspn(p
, "/");
1356 n
= strcspn(p
, "/");
1357 if (n
< strlen("session-x.scope"))
1360 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1361 char buf
[n
- 8 - 6 + 1];
1363 memcpy(buf
, p
+ 8, n
- 8 - 6);
1366 /* Note that session scopes never need unescaping,
1367 * since they cannot conflict with the kernel's own
1368 * names, hence we don't need to call cg_unescape()
1371 if (!session_id_valid(buf
))
1375 p
+= strspn(p
, "/");
1383 * Skip user@*.service, but require it to be there.
1385 static const char *skip_user_manager(const char *p
) {
1391 p
+= strspn(p
, "/");
1393 n
= strcspn(p
, "/");
1394 if (n
< strlen("user@x.service"))
1397 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1398 char buf
[n
- 5 - 8 + 1];
1400 memcpy(buf
, p
+ 5, n
- 5 - 8);
1403 /* Note that user manager services never need unescaping,
1404 * since they cannot conflict with the kernel's own
1405 * names, hence we don't need to call cg_unescape()
1408 if (parse_uid(buf
, NULL
) < 0)
1412 p
+= strspn(p
, "/");
1420 static const char *skip_user_prefix(const char *path
) {
1425 /* Skip slices, if there are any */
1426 e
= skip_slices(path
);
1428 /* Skip the user manager, if it's in the path now... */
1429 t
= skip_user_manager(e
);
1433 /* Alternatively skip the user session if it is in the path... */
1434 return skip_session(e
);
1437 int cg_path_get_user_unit(const char *path
, char **ret
) {
1443 t
= skip_user_prefix(path
);
1447 /* And from here on it looks pretty much the same as for a
1448 * system unit, hence let's use the same parser from here
1450 return cg_path_get_unit(t
, ret
);
1453 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1454 _cleanup_free_
char *cgroup
= NULL
;
1459 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1463 return cg_path_get_user_unit(cgroup
, unit
);
1466 int cg_path_get_machine_name(const char *path
, char **machine
) {
1467 _cleanup_free_
char *u
= NULL
;
1471 r
= cg_path_get_unit(path
, &u
);
1475 sl
= strjoina("/run/systemd/machines/unit:", u
);
1476 return readlink_malloc(sl
, machine
);
1479 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1480 _cleanup_free_
char *cgroup
= NULL
;
1485 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1489 return cg_path_get_machine_name(cgroup
, machine
);
1492 int cg_path_get_session(const char *path
, char **session
) {
1493 _cleanup_free_
char *unit
= NULL
;
1499 r
= cg_path_get_unit(path
, &unit
);
1503 start
= startswith(unit
, "session-");
1506 end
= endswith(start
, ".scope");
1511 if (!session_id_valid(start
))
1527 int cg_pid_get_session(pid_t pid
, char **session
) {
1528 _cleanup_free_
char *cgroup
= NULL
;
1531 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1535 return cg_path_get_session(cgroup
, session
);
1538 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1539 _cleanup_free_
char *slice
= NULL
;
1545 r
= cg_path_get_slice(path
, &slice
);
1549 start
= startswith(slice
, "user-");
1552 end
= endswith(start
, ".slice");
1557 if (parse_uid(start
, uid
) < 0)
1563 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1564 _cleanup_free_
char *cgroup
= NULL
;
1567 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1571 return cg_path_get_owner_uid(cgroup
, uid
);
1574 int cg_path_get_slice(const char *p
, char **slice
) {
1575 const char *e
= NULL
;
1580 /* Finds the right-most slice unit from the beginning, but
1581 * stops before we come to the first non-slice unit. */
1586 p
+= strspn(p
, "/");
1588 n
= strcspn(p
, "/");
1589 if (!valid_slice_name(p
, n
)) {
1594 s
= strdup("-.slice");
1602 return cg_path_decode_unit(e
, slice
);
1610 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1611 _cleanup_free_
char *cgroup
= NULL
;
1616 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1620 return cg_path_get_slice(cgroup
, slice
);
1623 int cg_path_get_user_slice(const char *p
, char **slice
) {
1628 t
= skip_user_prefix(p
);
1632 /* And now it looks pretty much the same as for a system
1633 * slice, so let's just use the same parser from here on. */
1634 return cg_path_get_slice(t
, slice
);
1637 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1638 _cleanup_free_
char *cgroup
= NULL
;
1643 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1647 return cg_path_get_user_slice(cgroup
, slice
);
1650 char *cg_escape(const char *p
) {
1651 bool need_prefix
= false;
1653 /* This implements very minimal escaping for names to be used
1654 * as file names in the cgroup tree: any name which might
1655 * conflict with a kernel name or is prefixed with '_' is
1656 * prefixed with a '_'. That way, when reading cgroup names it
1657 * is sufficient to remove a single prefixing underscore if
1660 /* The return value of this function (unlike cg_unescape())
1666 streq(p
, "notify_on_release") ||
1667 streq(p
, "release_agent") ||
1668 streq(p
, "tasks") ||
1669 startswith(p
, "cgroup."))
1674 dot
= strrchr(p
, '.');
1679 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1682 n
= cgroup_controller_to_string(c
);
1687 if (memcmp(p
, n
, l
) != 0)
1697 return strappend("_", p
);
1702 char *cg_unescape(const char *p
) {
1705 /* The return value of this function (unlike cg_escape())
1706 * doesn't need free()! */
1714 #define CONTROLLER_VALID \
1718 bool cg_controller_is_valid(const char *p
) {
1724 s
= startswith(p
, "name=");
1728 if (*p
== 0 || *p
== '_')
1731 for (t
= p
; *t
; t
++)
1732 if (!strchr(CONTROLLER_VALID
, *t
))
1735 if (t
- p
> FILENAME_MAX
)
1741 int cg_slice_to_path(const char *unit
, char **ret
) {
1742 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1749 if (streq(unit
, "-.slice")) {
1759 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1762 if (!endswith(unit
, ".slice"))
1765 r
= unit_name_to_prefix(unit
, &p
);
1769 dash
= strchr(p
, '-');
1771 /* Don't allow initial dashes */
1776 _cleanup_free_
char *escaped
= NULL
;
1777 char n
[dash
- p
+ sizeof(".slice")];
1779 /* Don't allow trailing or double dashes */
1780 if (dash
[1] == 0 || dash
[1] == '-')
1783 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1784 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1787 escaped
= cg_escape(n
);
1791 if (!strextend(&s
, escaped
, "/", NULL
))
1794 dash
= strchr(dash
+1, '-');
1797 e
= cg_escape(unit
);
1801 if (!strextend(&s
, e
, NULL
))
1810 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1811 _cleanup_free_
char *p
= NULL
;
1814 r
= cg_get_path(controller
, path
, attribute
, &p
);
1818 return write_string_file(p
, value
, 0);
1821 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1822 _cleanup_free_
char *p
= NULL
;
1825 r
= cg_get_path(controller
, path
, attribute
, &p
);
1829 return read_one_line_file(p
, ret
);
1832 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1836 /* This one will create a cgroup in our private tree, but also
1837 * duplicate it in the trees specified in mask, and remove it
1840 /* First create the cgroup in our own hierarchy. */
1841 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1845 /* If we are in the unified hierarchy, we are done now */
1846 unified
= cg_unified();
1852 /* Otherwise, do the same in the other hierarchies */
1853 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1854 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1857 n
= cgroup_controller_to_string(c
);
1860 (void) cg_create(n
, path
);
1861 else if (supported
& bit
)
1862 (void) cg_trim(n
, path
, true);
1868 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1872 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1876 unified
= cg_unified();
1882 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1883 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1884 const char *p
= NULL
;
1886 if (!(supported
& bit
))
1890 p
= path_callback(bit
, userdata
);
1895 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1901 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1906 SET_FOREACH(pidp
, pids
, i
) {
1907 pid_t pid
= PTR_TO_PID(pidp
);
1910 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1911 if (q
< 0 && r
>= 0)
1918 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1922 if (!path_equal(from
, to
)) {
1923 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1928 unified
= cg_unified();
1934 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1935 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1936 const char *p
= NULL
;
1938 if (!(supported
& bit
))
1942 p
= to_callback(bit
, userdata
);
1947 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1953 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1957 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1961 unified
= cg_unified();
1967 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1968 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1970 if (!(supported
& bit
))
1973 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1979 int cg_mask_supported(CGroupMask
*ret
) {
1980 CGroupMask mask
= 0;
1983 /* Determines the mask of supported cgroup controllers. Only
1984 * includes controllers we can make sense of and that are
1985 * actually accessible. */
1987 unified
= cg_unified();
1991 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1994 /* In the unified hierarchy we can read the supported
1995 * and accessible controllers from a the top-level
1996 * cgroup attribute */
1998 r
= cg_get_root_path(&root
);
2002 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2006 r
= read_one_line_file(path
, &controllers
);
2012 _cleanup_free_
char *n
= NULL
;
2015 r
= extract_first_word(&c
, &n
, NULL
, 0);
2021 v
= cgroup_controller_from_string(n
);
2025 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2028 /* Currently, we only support the memory and pids
2029 * controller in the unified hierarchy, mask
2030 * everything else off. */
2031 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2036 /* In the legacy hierarchy, we check whether which
2037 * hierarchies are mounted. */
2039 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2042 n
= cgroup_controller_to_string(c
);
2043 if (controller_is_accessible(n
) >= 0)
2044 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2052 int cg_kernel_controllers(Set
*controllers
) {
2053 _cleanup_fclose_
FILE *f
= NULL
;
2057 assert(controllers
);
2059 /* Determines the full list of kernel-known controllers. Might
2060 * include controllers we don't actually support, arbitrary
2061 * named hierarchies and controllers that aren't currently
2062 * accessible (because not mounted). */
2064 f
= fopen("/proc/cgroups", "re");
2066 if (errno
== ENOENT
)
2071 /* Ignore the header line */
2072 (void) fgets(buf
, sizeof(buf
), f
);
2079 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2084 if (ferror(f
) && errno
!= 0)
2095 if (!cg_controller_is_valid(controller
)) {
2100 r
= set_consume(controllers
, controller
);
2108 static thread_local
int unified_cache
= -1;
2110 int cg_unified(void) {
2113 /* Checks if we support the unified hierarchy. Returns an
2114 * error when the cgroup hierarchies aren't mounted yet or we
2115 * have any other trouble determining if the unified hierarchy
2118 if (unified_cache
>= 0)
2119 return unified_cache
;
2121 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2124 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2125 unified_cache
= true;
2126 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2127 unified_cache
= false;
2131 return unified_cache
;
2134 void cg_unified_flush(void) {
2138 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2139 _cleanup_free_
char *fs
= NULL
;
2148 unified
= cg_unified();
2151 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2154 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2158 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2159 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2162 if (!(supported
& bit
))
2165 n
= cgroup_controller_to_string(c
);
2167 char s
[1 + strlen(n
) + 1];
2169 s
[0] = mask
& bit
? '+' : '-';
2172 r
= write_string_file(fs
, s
, 0);
2174 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2181 bool cg_is_unified_wanted(void) {
2182 static thread_local
int wanted
= -1;
2185 /* If the hierarchy is already mounted, then follow whatever
2186 * was chosen for it. */
2187 unified
= cg_unified();
2191 /* Otherwise, let's see what the kernel command line has to
2192 * say. Since checking that is expensive, let's cache the
2197 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2199 return (wanted
= true);
2201 _cleanup_free_
char *value
= NULL
;
2203 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2207 return (wanted
= false);
2209 return (wanted
= parse_boolean(value
) > 0);
2213 bool cg_is_legacy_wanted(void) {
2214 return !cg_is_unified_wanted();
2217 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2222 *ret
= CGROUP_CPU_SHARES_INVALID
;
2226 r
= safe_atou64(s
, &u
);
2230 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2237 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2242 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2246 r
= safe_atou64(s
, &u
);
2250 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2257 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2258 [CGROUP_CONTROLLER_CPU
] = "cpu",
2259 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2260 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2261 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2262 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2263 [CGROUP_CONTROLLER_PIDS
] = "pids",
2264 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2267 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);