1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
31 #include <sys/statfs.h>
32 #include <sys/types.h>
35 #include "alloc-util.h"
36 #include "cgroup-util.h"
38 #include "dirent-util.h"
39 #include "extract-word.h"
42 #include "formats-util.h"
45 #include "login-util.h"
49 #include "parse-util.h"
50 #include "path-util.h"
51 #include "proc-cmdline.h"
52 #include "process-util.h"
55 #include "stat-util.h"
56 #include "string-table.h"
57 #include "string-util.h"
58 #include "unit-name.h"
59 #include "user-util.h"
61 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
62 _cleanup_free_
char *fs
= NULL
;
68 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
80 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
90 if (fscanf(f
, "%lu", &ul
) != 1) {
95 return errno
? -errno
: -EIO
;
105 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
106 _cleanup_free_
char *fs
= NULL
;
112 /* This is not recursive! */
114 r
= cg_get_path(controller
, path
, NULL
, &fs
);
126 int cg_read_subgroup(DIR *d
, char **fn
) {
132 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
135 if (de
->d_type
!= DT_DIR
)
138 if (streq(de
->d_name
, ".") ||
139 streq(de
->d_name
, ".."))
142 b
= strdup(de
->d_name
);
153 int cg_rmdir(const char *controller
, const char *path
) {
154 _cleanup_free_
char *p
= NULL
;
157 r
= cg_get_path(controller
, path
, NULL
, &p
);
162 if (r
< 0 && errno
!= ENOENT
)
168 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
169 _cleanup_set_free_ Set
*allocated_set
= NULL
;
176 /* This goes through the tasks list and kills them all. This
177 * is repeated until no further processes are added to the
178 * tasks list, to properly handle forking processes */
181 s
= allocated_set
= set_new(NULL
);
189 _cleanup_fclose_
FILE *f
= NULL
;
193 r
= cg_enumerate_processes(controller
, path
, &f
);
195 if (ret
>= 0 && r
!= -ENOENT
)
201 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
203 if (ignore_self
&& pid
== my_pid
)
206 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
209 /* If we haven't killed this process yet, kill
211 if (kill(pid
, sig
) < 0) {
212 if (ret
>= 0 && errno
!= ESRCH
)
215 if (sigcont
&& sig
!= SIGKILL
)
216 (void) kill(pid
, SIGCONT
);
224 r
= set_put(s
, PID_TO_PTR(pid
));
240 /* To avoid racing against processes which fork
241 * quicker than we can kill them we repeat this until
242 * no new pids need to be killed. */
249 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
250 _cleanup_set_free_ Set
*allocated_set
= NULL
;
251 _cleanup_closedir_
DIR *d
= NULL
;
259 s
= allocated_set
= set_new(NULL
);
264 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
266 r
= cg_enumerate_subgroups(controller
, path
, &d
);
268 if (ret
>= 0 && r
!= -ENOENT
)
274 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
275 _cleanup_free_
char *p
= NULL
;
277 p
= strjoin(path
, "/", fn
, NULL
);
282 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
283 if (r
!= 0 && ret
>= 0)
287 if (ret
>= 0 && r
< 0)
291 r
= cg_rmdir(controller
, path
);
292 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
299 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
301 _cleanup_set_free_ Set
*s
= NULL
;
317 _cleanup_fclose_
FILE *f
= NULL
;
321 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
323 if (ret
>= 0 && r
!= -ENOENT
)
329 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
331 /* This might do weird stuff if we aren't a
332 * single-threaded program. However, we
333 * luckily know we are not */
334 if (ignore_self
&& pid
== my_pid
)
337 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
340 /* Ignore kernel threads. Since they can only
341 * exist in the root cgroup, we only check for
344 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
345 is_kernel_thread(pid
) > 0)
348 r
= cg_attach(cto
, pto
, pid
);
350 if (ret
>= 0 && r
!= -ESRCH
)
357 r
= set_put(s
, PID_TO_PTR(pid
));
377 int cg_migrate_recursive(
385 _cleanup_closedir_
DIR *d
= NULL
;
394 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
396 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
398 if (ret
>= 0 && r
!= -ENOENT
)
404 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
405 _cleanup_free_
char *p
= NULL
;
407 p
= strjoin(pfrom
, "/", fn
, NULL
);
412 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
413 if (r
!= 0 && ret
>= 0)
417 if (r
< 0 && ret
>= 0)
421 r
= cg_rmdir(cfrom
, pfrom
);
422 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
429 int cg_migrate_recursive_fallback(
444 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
446 char prefix
[strlen(pto
) + 1];
448 /* This didn't work? Then let's try all prefixes of the destination */
450 PATH_FOREACH_PREFIX(prefix
, pto
) {
453 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
462 static const char *controller_to_dirname(const char *controller
) {
467 /* Converts a controller name to the directory name below
468 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
469 * just cuts off the name= prefixed used for named
470 * hierarchies, if it is specified. */
472 e
= startswith(controller
, "name=");
479 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
486 dn
= controller_to_dirname(controller
);
488 if (isempty(path
) && isempty(suffix
))
489 t
= strappend("/sys/fs/cgroup/", dn
);
490 else if (isempty(path
))
491 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
492 else if (isempty(suffix
))
493 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
495 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
503 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
508 if (isempty(path
) && isempty(suffix
))
509 t
= strdup("/sys/fs/cgroup");
510 else if (isempty(path
))
511 t
= strappend("/sys/fs/cgroup/", suffix
);
512 else if (isempty(suffix
))
513 t
= strappend("/sys/fs/cgroup/", path
);
515 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
523 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
531 /* If no controller is specified, we return the path
532 * *below* the controllers, without any prefix. */
534 if (!path
&& !suffix
)
542 t
= strjoin(path
, "/", suffix
, NULL
);
546 *fs
= path_kill_slashes(t
);
550 if (!cg_controller_is_valid(controller
))
553 unified
= cg_unified();
558 r
= join_path_unified(path
, suffix
, fs
);
560 r
= join_path_legacy(controller
, path
, suffix
, fs
);
564 path_kill_slashes(*fs
);
568 static int controller_is_accessible(const char *controller
) {
573 /* Checks whether a specific controller is accessible,
574 * i.e. its hierarchy mounted. In the unified hierarchy all
575 * controllers are considered accessible, except for the named
578 if (!cg_controller_is_valid(controller
))
581 unified
= cg_unified();
585 /* We don't support named hierarchies if we are using
586 * the unified hierarchy. */
588 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
591 if (startswith(controller
, "name="))
597 dn
= controller_to_dirname(controller
);
598 cc
= strjoina("/sys/fs/cgroup/", dn
);
600 if (laccess(cc
, F_OK
) < 0)
607 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
613 /* Check if the specified controller is actually accessible */
614 r
= controller_is_accessible(controller
);
618 return cg_get_path(controller
, path
, suffix
, fs
);
621 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
626 if (typeflag
!= FTW_DP
)
629 if (ftwbuf
->level
< 1)
636 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
637 _cleanup_free_
char *fs
= NULL
;
642 r
= cg_get_path(controller
, path
, NULL
, &fs
);
647 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
657 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
664 int cg_create(const char *controller
, const char *path
) {
665 _cleanup_free_
char *fs
= NULL
;
668 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
672 r
= mkdir_parents(fs
, 0755);
676 if (mkdir(fs
, 0755) < 0) {
687 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
692 r
= cg_create(controller
, path
);
696 q
= cg_attach(controller
, path
, pid
);
700 /* This does not remove the cgroup on failure */
704 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
705 _cleanup_free_
char *fs
= NULL
;
706 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
712 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
719 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
721 return write_string_file(fs
, c
, 0);
724 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
731 r
= cg_attach(controller
, path
, pid
);
733 char prefix
[strlen(path
) + 1];
735 /* This didn't work? Then let's try all prefixes of
738 PATH_FOREACH_PREFIX(prefix
, path
) {
741 q
= cg_attach(controller
, prefix
, pid
);
750 int cg_set_group_access(
751 const char *controller
,
757 _cleanup_free_
char *fs
= NULL
;
760 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
763 if (mode
!= MODE_INVALID
)
766 r
= cg_get_path(controller
, path
, NULL
, &fs
);
770 return chmod_and_chown(fs
, mode
, uid
, gid
);
773 int cg_set_task_access(
774 const char *controller
,
780 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
785 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
788 if (mode
!= MODE_INVALID
)
791 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
795 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
799 unified
= cg_unified();
805 /* Compatibility, Always keep values for "tasks" in sync with
807 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
808 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
813 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
814 _cleanup_fclose_
FILE *f
= NULL
;
823 unified
= cg_unified();
828 if (!cg_controller_is_valid(controller
))
831 controller
= SYSTEMD_CGROUP_CONTROLLER
;
833 cs
= strlen(controller
);
836 fs
= procfs_file_alloca(pid
, "cgroup");
839 return errno
== ENOENT
? -ESRCH
: -errno
;
841 FOREACH_LINE(line
, f
, return -errno
) {
847 e
= startswith(line
, "0:");
857 const char *word
, *state
;
860 l
= strchr(line
, ':');
870 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
871 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
892 int cg_install_release_agent(const char *controller
, const char *agent
) {
893 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
899 unified
= cg_unified();
902 if (unified
) /* doesn't apply to unified hierarchy */
905 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
909 r
= read_one_line_file(fs
, &contents
);
913 sc
= strstrip(contents
);
915 r
= write_string_file(fs
, agent
, 0);
918 } else if (!path_equal(sc
, agent
))
922 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
926 contents
= mfree(contents
);
927 r
= read_one_line_file(fs
, &contents
);
931 sc
= strstrip(contents
);
932 if (streq(sc
, "0")) {
933 r
= write_string_file(fs
, "1", 0);
946 int cg_uninstall_release_agent(const char *controller
) {
947 _cleanup_free_
char *fs
= NULL
;
950 unified
= cg_unified();
953 if (unified
) /* Doesn't apply to unified hierarchy */
956 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
960 r
= write_string_file(fs
, "0", 0);
966 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
970 r
= write_string_file(fs
, "", 0);
977 int cg_is_empty(const char *controller
, const char *path
) {
978 _cleanup_fclose_
FILE *f
= NULL
;
984 r
= cg_enumerate_processes(controller
, path
, &f
);
990 r
= cg_read_pid(f
, &pid
);
997 int cg_is_empty_recursive(const char *controller
, const char *path
) {
1002 /* The root cgroup is always populated */
1003 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
1006 unified
= cg_unified();
1011 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1013 /* On the unified hierarchy we can check empty state
1014 * via the "cgroup.populated" attribute. */
1016 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1020 r
= read_one_line_file(populated
, &t
);
1026 return streq(t
, "0");
1028 _cleanup_closedir_
DIR *d
= NULL
;
1031 r
= cg_is_empty(controller
, path
);
1035 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1041 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1042 _cleanup_free_
char *p
= NULL
;
1044 p
= strjoin(path
, "/", fn
, NULL
);
1049 r
= cg_is_empty_recursive(controller
, p
);
1060 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1061 char *t
= NULL
, *u
= NULL
;
1067 if (!path_is_safe(spec
))
1075 *path
= path_kill_slashes(t
);
1084 e
= strchr(spec
, ':');
1086 if (!cg_controller_is_valid(spec
))
1103 t
= strndup(spec
, e
-spec
);
1106 if (!cg_controller_is_valid(t
)) {
1120 if (!path_is_safe(u
) ||
1121 !path_is_absolute(u
)) {
1127 path_kill_slashes(u
);
1143 int cg_mangle_path(const char *path
, char **result
) {
1144 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1151 /* First, check if it already is a filesystem path */
1152 if (path_startswith(path
, "/sys/fs/cgroup")) {
1158 *result
= path_kill_slashes(t
);
1162 /* Otherwise, treat it as cg spec */
1163 r
= cg_split_spec(path
, &c
, &p
);
1167 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1170 int cg_get_root_path(char **path
) {
1176 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1180 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1182 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1184 e
= endswith(p
, "/system"); /* even more legacy */
1192 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1193 _cleanup_free_
char *rt
= NULL
;
1201 /* If the root was specified let's use that, otherwise
1202 * let's determine it from PID 1 */
1204 r
= cg_get_root_path(&rt
);
1211 p
= path_startswith(cgroup
, root
);
1212 if (p
&& p
> cgroup
)
1220 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1221 _cleanup_free_
char *raw
= NULL
;
1228 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1232 r
= cg_shift_path(raw
, root
, &c
);
1252 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1259 n
= strcspn(cgroup
, "/");
1263 c
= strndupa(cgroup
, n
);
1266 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1277 static bool valid_slice_name(const char *p
, size_t n
) {
1282 if (n
< strlen("x.slice"))
1285 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1291 c
= cg_unescape(buf
);
1293 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1299 static const char *skip_slices(const char *p
) {
1302 /* Skips over all slice assignments */
1307 p
+= strspn(p
, "/");
1309 n
= strcspn(p
, "/");
1310 if (!valid_slice_name(p
, n
))
1317 int cg_path_get_unit(const char *path
, char **ret
) {
1325 e
= skip_slices(path
);
1327 r
= cg_path_decode_unit(e
, &unit
);
1331 /* We skipped over the slices, don't accept any now */
1332 if (endswith(unit
, ".slice")) {
1341 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1342 _cleanup_free_
char *cgroup
= NULL
;
1347 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1351 return cg_path_get_unit(cgroup
, unit
);
1355 * Skip session-*.scope, but require it to be there.
1357 static const char *skip_session(const char *p
) {
1363 p
+= strspn(p
, "/");
1365 n
= strcspn(p
, "/");
1366 if (n
< strlen("session-x.scope"))
1369 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1370 char buf
[n
- 8 - 6 + 1];
1372 memcpy(buf
, p
+ 8, n
- 8 - 6);
1375 /* Note that session scopes never need unescaping,
1376 * since they cannot conflict with the kernel's own
1377 * names, hence we don't need to call cg_unescape()
1380 if (!session_id_valid(buf
))
1384 p
+= strspn(p
, "/");
1392 * Skip user@*.service, but require it to be there.
1394 static const char *skip_user_manager(const char *p
) {
1400 p
+= strspn(p
, "/");
1402 n
= strcspn(p
, "/");
1403 if (n
< strlen("user@x.service"))
1406 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1407 char buf
[n
- 5 - 8 + 1];
1409 memcpy(buf
, p
+ 5, n
- 5 - 8);
1412 /* Note that user manager services never need unescaping,
1413 * since they cannot conflict with the kernel's own
1414 * names, hence we don't need to call cg_unescape()
1417 if (parse_uid(buf
, NULL
) < 0)
1421 p
+= strspn(p
, "/");
1429 static const char *skip_user_prefix(const char *path
) {
1434 /* Skip slices, if there are any */
1435 e
= skip_slices(path
);
1437 /* Skip the user manager, if it's in the path now... */
1438 t
= skip_user_manager(e
);
1442 /* Alternatively skip the user session if it is in the path... */
1443 return skip_session(e
);
1446 int cg_path_get_user_unit(const char *path
, char **ret
) {
1452 t
= skip_user_prefix(path
);
1456 /* And from here on it looks pretty much the same as for a
1457 * system unit, hence let's use the same parser from here
1459 return cg_path_get_unit(t
, ret
);
1462 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1463 _cleanup_free_
char *cgroup
= NULL
;
1468 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1472 return cg_path_get_user_unit(cgroup
, unit
);
1475 int cg_path_get_machine_name(const char *path
, char **machine
) {
1476 _cleanup_free_
char *u
= NULL
;
1480 r
= cg_path_get_unit(path
, &u
);
1484 sl
= strjoina("/run/systemd/machines/unit:", u
);
1485 return readlink_malloc(sl
, machine
);
1488 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1489 _cleanup_free_
char *cgroup
= NULL
;
1494 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1498 return cg_path_get_machine_name(cgroup
, machine
);
1501 int cg_path_get_session(const char *path
, char **session
) {
1502 _cleanup_free_
char *unit
= NULL
;
1508 r
= cg_path_get_unit(path
, &unit
);
1512 start
= startswith(unit
, "session-");
1515 end
= endswith(start
, ".scope");
1520 if (!session_id_valid(start
))
1536 int cg_pid_get_session(pid_t pid
, char **session
) {
1537 _cleanup_free_
char *cgroup
= NULL
;
1540 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1544 return cg_path_get_session(cgroup
, session
);
1547 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1548 _cleanup_free_
char *slice
= NULL
;
1554 r
= cg_path_get_slice(path
, &slice
);
1558 start
= startswith(slice
, "user-");
1561 end
= endswith(start
, ".slice");
1566 if (parse_uid(start
, uid
) < 0)
1572 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1573 _cleanup_free_
char *cgroup
= NULL
;
1576 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1580 return cg_path_get_owner_uid(cgroup
, uid
);
1583 int cg_path_get_slice(const char *p
, char **slice
) {
1584 const char *e
= NULL
;
1589 /* Finds the right-most slice unit from the beginning, but
1590 * stops before we come to the first non-slice unit. */
1595 p
+= strspn(p
, "/");
1597 n
= strcspn(p
, "/");
1598 if (!valid_slice_name(p
, n
)) {
1603 s
= strdup("-.slice");
1611 return cg_path_decode_unit(e
, slice
);
1619 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1620 _cleanup_free_
char *cgroup
= NULL
;
1625 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1629 return cg_path_get_slice(cgroup
, slice
);
1632 int cg_path_get_user_slice(const char *p
, char **slice
) {
1637 t
= skip_user_prefix(p
);
1641 /* And now it looks pretty much the same as for a system
1642 * slice, so let's just use the same parser from here on. */
1643 return cg_path_get_slice(t
, slice
);
1646 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1647 _cleanup_free_
char *cgroup
= NULL
;
1652 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1656 return cg_path_get_user_slice(cgroup
, slice
);
1659 char *cg_escape(const char *p
) {
1660 bool need_prefix
= false;
1662 /* This implements very minimal escaping for names to be used
1663 * as file names in the cgroup tree: any name which might
1664 * conflict with a kernel name or is prefixed with '_' is
1665 * prefixed with a '_'. That way, when reading cgroup names it
1666 * is sufficient to remove a single prefixing underscore if
1669 /* The return value of this function (unlike cg_unescape())
1675 streq(p
, "notify_on_release") ||
1676 streq(p
, "release_agent") ||
1677 streq(p
, "tasks") ||
1678 startswith(p
, "cgroup."))
1683 dot
= strrchr(p
, '.');
1688 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1691 n
= cgroup_controller_to_string(c
);
1696 if (memcmp(p
, n
, l
) != 0)
1706 return strappend("_", p
);
1711 char *cg_unescape(const char *p
) {
1714 /* The return value of this function (unlike cg_escape())
1715 * doesn't need free()! */
1723 #define CONTROLLER_VALID \
1727 bool cg_controller_is_valid(const char *p
) {
1733 s
= startswith(p
, "name=");
1737 if (*p
== 0 || *p
== '_')
1740 for (t
= p
; *t
; t
++)
1741 if (!strchr(CONTROLLER_VALID
, *t
))
1744 if (t
- p
> FILENAME_MAX
)
1750 int cg_slice_to_path(const char *unit
, char **ret
) {
1751 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1758 if (streq(unit
, "-.slice")) {
1768 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1771 if (!endswith(unit
, ".slice"))
1774 r
= unit_name_to_prefix(unit
, &p
);
1778 dash
= strchr(p
, '-');
1780 /* Don't allow initial dashes */
1785 _cleanup_free_
char *escaped
= NULL
;
1786 char n
[dash
- p
+ sizeof(".slice")];
1788 /* Don't allow trailing or double dashes */
1789 if (dash
[1] == 0 || dash
[1] == '-')
1792 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1793 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1796 escaped
= cg_escape(n
);
1800 if (!strextend(&s
, escaped
, "/", NULL
))
1803 dash
= strchr(dash
+1, '-');
1806 e
= cg_escape(unit
);
1810 if (!strextend(&s
, e
, NULL
))
1819 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1820 _cleanup_free_
char *p
= NULL
;
1823 r
= cg_get_path(controller
, path
, attribute
, &p
);
1827 return write_string_file(p
, value
, 0);
1830 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1831 _cleanup_free_
char *p
= NULL
;
1834 r
= cg_get_path(controller
, path
, attribute
, &p
);
1838 return read_one_line_file(p
, ret
);
1841 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1845 /* This one will create a cgroup in our private tree, but also
1846 * duplicate it in the trees specified in mask, and remove it
1849 /* First create the cgroup in our own hierarchy. */
1850 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1854 /* If we are in the unified hierarchy, we are done now */
1855 unified
= cg_unified();
1861 /* Otherwise, do the same in the other hierarchies */
1862 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1863 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1866 n
= cgroup_controller_to_string(c
);
1869 (void) cg_create(n
, path
);
1870 else if (supported
& bit
)
1871 (void) cg_trim(n
, path
, true);
1877 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1881 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1885 unified
= cg_unified();
1891 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1892 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1893 const char *p
= NULL
;
1895 if (!(supported
& bit
))
1899 p
= path_callback(bit
, userdata
);
1904 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1910 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1915 SET_FOREACH(pidp
, pids
, i
) {
1916 pid_t pid
= PTR_TO_PID(pidp
);
1919 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1920 if (q
< 0 && r
>= 0)
1927 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1931 if (!path_equal(from
, to
)) {
1932 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1937 unified
= cg_unified();
1943 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1944 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1945 const char *p
= NULL
;
1947 if (!(supported
& bit
))
1951 p
= to_callback(bit
, userdata
);
1956 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1962 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1966 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1970 unified
= cg_unified();
1976 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1977 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1979 if (!(supported
& bit
))
1982 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1988 int cg_mask_supported(CGroupMask
*ret
) {
1989 CGroupMask mask
= 0;
1992 /* Determines the mask of supported cgroup controllers. Only
1993 * includes controllers we can make sense of and that are
1994 * actually accessible. */
1996 unified
= cg_unified();
2000 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
2003 /* In the unified hierarchy we can read the supported
2004 * and accessible controllers from a the top-level
2005 * cgroup attribute */
2007 r
= cg_get_root_path(&root
);
2011 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2015 r
= read_one_line_file(path
, &controllers
);
2021 _cleanup_free_
char *n
= NULL
;
2024 r
= extract_first_word(&c
, &n
, NULL
, 0);
2030 v
= cgroup_controller_from_string(n
);
2034 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2037 /* Currently, we only support the memory and pids
2038 * controller in the unified hierarchy, mask
2039 * everything else off. */
2040 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2045 /* In the legacy hierarchy, we check whether which
2046 * hierarchies are mounted. */
2048 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2051 n
= cgroup_controller_to_string(c
);
2052 if (controller_is_accessible(n
) >= 0)
2053 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2061 int cg_kernel_controllers(Set
*controllers
) {
2062 _cleanup_fclose_
FILE *f
= NULL
;
2066 assert(controllers
);
2068 /* Determines the full list of kernel-known controllers. Might
2069 * include controllers we don't actually support, arbitrary
2070 * named hierarchies and controllers that aren't currently
2071 * accessible (because not mounted). */
2073 f
= fopen("/proc/cgroups", "re");
2075 if (errno
== ENOENT
)
2080 /* Ignore the header line */
2081 (void) fgets(buf
, sizeof(buf
), f
);
2088 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2093 if (ferror(f
) && errno
!= 0)
2104 if (!cg_controller_is_valid(controller
)) {
2109 r
= set_consume(controllers
, controller
);
2117 static thread_local
int unified_cache
= -1;
2119 int cg_unified(void) {
2122 /* Checks if we support the unified hierarchy. Returns an
2123 * error when the cgroup hierarchies aren't mounted yet or we
2124 * have any other trouble determining if the unified hierarchy
2127 if (unified_cache
>= 0)
2128 return unified_cache
;
2130 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2133 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2134 unified_cache
= true;
2135 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2136 unified_cache
= false;
2140 return unified_cache
;
2143 void cg_unified_flush(void) {
2147 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2148 _cleanup_free_
char *fs
= NULL
;
2157 unified
= cg_unified();
2160 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2163 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2167 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2168 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2171 if (!(supported
& bit
))
2174 n
= cgroup_controller_to_string(c
);
2176 char s
[1 + strlen(n
) + 1];
2178 s
[0] = mask
& bit
? '+' : '-';
2181 r
= write_string_file(fs
, s
, 0);
2183 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2190 bool cg_is_unified_wanted(void) {
2191 static thread_local
int wanted
= -1;
2194 /* If the hierarchy is already mounted, then follow whatever
2195 * was chosen for it. */
2196 unified
= cg_unified();
2200 /* Otherwise, let's see what the kernel command line has to
2201 * say. Since checking that is expensive, let's cache the
2206 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2208 return (wanted
= true);
2210 _cleanup_free_
char *value
= NULL
;
2212 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2216 return (wanted
= false);
2218 return (wanted
= parse_boolean(value
) > 0);
2222 bool cg_is_legacy_wanted(void) {
2223 return !cg_is_unified_wanted();
2226 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2231 *ret
= CGROUP_CPU_SHARES_INVALID
;
2235 r
= safe_atou64(s
, &u
);
2239 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2246 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2251 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2255 r
= safe_atou64(s
, &u
);
2259 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2266 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2267 [CGROUP_CONTROLLER_CPU
] = "cpu",
2268 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2269 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2270 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2271 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2272 [CGROUP_CONTROLLER_PIDS
] = "pids",
2273 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2276 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);