1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
33 #include "dirent-util.h"
34 #include "extract-word.h"
37 #include "formats-util.h"
38 #include "login-util.h"
41 #include "parse-util.h"
42 #include "path-util.h"
43 #include "process-util.h"
46 #include "string-util.h"
47 #include "unit-name.h"
48 #include "user-util.h"
51 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
52 _cleanup_free_
char *fs
= NULL
;
58 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
70 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
73 /* Note that the cgroup.procs might contain duplicates! See
74 * cgroups.txt for details. */
80 if (fscanf(f
, "%lu", &ul
) != 1) {
85 return errno
? -errno
: -EIO
;
95 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
96 _cleanup_free_
char *fs
= NULL
;
102 /* This is not recursive! */
104 r
= cg_get_path(controller
, path
, NULL
, &fs
);
116 int cg_read_subgroup(DIR *d
, char **fn
) {
122 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
125 if (de
->d_type
!= DT_DIR
)
128 if (streq(de
->d_name
, ".") ||
129 streq(de
->d_name
, ".."))
132 b
= strdup(de
->d_name
);
143 int cg_rmdir(const char *controller
, const char *path
) {
144 _cleanup_free_
char *p
= NULL
;
147 r
= cg_get_path(controller
, path
, NULL
, &p
);
152 if (r
< 0 && errno
!= ENOENT
)
158 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
159 _cleanup_set_free_ Set
*allocated_set
= NULL
;
166 /* This goes through the tasks list and kills them all. This
167 * is repeated until no further processes are added to the
168 * tasks list, to properly handle forking processes */
171 s
= allocated_set
= set_new(NULL
);
179 _cleanup_fclose_
FILE *f
= NULL
;
183 r
= cg_enumerate_processes(controller
, path
, &f
);
185 if (ret
>= 0 && r
!= -ENOENT
)
191 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
193 if (ignore_self
&& pid
== my_pid
)
196 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
199 /* If we haven't killed this process yet, kill
201 if (kill(pid
, sig
) < 0) {
202 if (ret
>= 0 && errno
!= ESRCH
)
205 if (sigcont
&& sig
!= SIGKILL
)
206 (void) kill(pid
, SIGCONT
);
214 r
= set_put(s
, PID_TO_PTR(pid
));
230 /* To avoid racing against processes which fork
231 * quicker than we can kill them we repeat this until
232 * no new pids need to be killed. */
239 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
240 _cleanup_set_free_ Set
*allocated_set
= NULL
;
241 _cleanup_closedir_
DIR *d
= NULL
;
249 s
= allocated_set
= set_new(NULL
);
254 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
256 r
= cg_enumerate_subgroups(controller
, path
, &d
);
258 if (ret
>= 0 && r
!= -ENOENT
)
264 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
265 _cleanup_free_
char *p
= NULL
;
267 p
= strjoin(path
, "/", fn
, NULL
);
272 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
273 if (r
!= 0 && ret
>= 0)
277 if (ret
>= 0 && r
< 0)
281 r
= cg_rmdir(controller
, path
);
282 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
289 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
291 _cleanup_set_free_ Set
*s
= NULL
;
307 _cleanup_fclose_
FILE *f
= NULL
;
311 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
313 if (ret
>= 0 && r
!= -ENOENT
)
319 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
321 /* This might do weird stuff if we aren't a
322 * single-threaded program. However, we
323 * luckily know we are not */
324 if (ignore_self
&& pid
== my_pid
)
327 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
330 /* Ignore kernel threads. Since they can only
331 * exist in the root cgroup, we only check for
334 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
335 is_kernel_thread(pid
) > 0)
338 r
= cg_attach(cto
, pto
, pid
);
340 if (ret
>= 0 && r
!= -ESRCH
)
347 r
= set_put(s
, PID_TO_PTR(pid
));
367 int cg_migrate_recursive(
375 _cleanup_closedir_
DIR *d
= NULL
;
384 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
386 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
388 if (ret
>= 0 && r
!= -ENOENT
)
394 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
395 _cleanup_free_
char *p
= NULL
;
397 p
= strjoin(pfrom
, "/", fn
, NULL
);
402 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
403 if (r
!= 0 && ret
>= 0)
407 if (r
< 0 && ret
>= 0)
411 r
= cg_rmdir(cfrom
, pfrom
);
412 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
419 int cg_migrate_recursive_fallback(
434 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
436 char prefix
[strlen(pto
) + 1];
438 /* This didn't work? Then let's try all prefixes of the destination */
440 PATH_FOREACH_PREFIX(prefix
, pto
) {
443 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
452 static const char *controller_to_dirname(const char *controller
) {
457 /* Converts a controller name to the directory name below
458 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
459 * just cuts off the name= prefixed used for named
460 * hierarchies, if it is specified. */
462 e
= startswith(controller
, "name=");
469 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
476 dn
= controller_to_dirname(controller
);
478 if (isempty(path
) && isempty(suffix
))
479 t
= strappend("/sys/fs/cgroup/", dn
);
480 else if (isempty(path
))
481 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
482 else if (isempty(suffix
))
483 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
485 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
493 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
498 if (isempty(path
) && isempty(suffix
))
499 t
= strdup("/sys/fs/cgroup");
500 else if (isempty(path
))
501 t
= strappend("/sys/fs/cgroup/", suffix
);
502 else if (isempty(suffix
))
503 t
= strappend("/sys/fs/cgroup/", path
);
505 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
513 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
521 /* If no controller is specified, we return the path
522 * *below* the controllers, without any prefix. */
524 if (!path
&& !suffix
)
532 t
= strjoin(path
, "/", suffix
, NULL
);
536 *fs
= path_kill_slashes(t
);
540 if (!cg_controller_is_valid(controller
))
543 unified
= cg_unified();
548 r
= join_path_unified(path
, suffix
, fs
);
550 r
= join_path_legacy(controller
, path
, suffix
, fs
);
554 path_kill_slashes(*fs
);
558 static int controller_is_accessible(const char *controller
) {
563 /* Checks whether a specific controller is accessible,
564 * i.e. its hierarchy mounted. In the unified hierarchy all
565 * controllers are considered accessible, except for the named
568 if (!cg_controller_is_valid(controller
))
571 unified
= cg_unified();
575 /* We don't support named hierarchies if we are using
576 * the unified hierarchy. */
578 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
581 if (startswith(controller
, "name="))
587 dn
= controller_to_dirname(controller
);
588 cc
= strjoina("/sys/fs/cgroup/", dn
);
590 if (laccess(cc
, F_OK
) < 0)
597 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
603 /* Check if the specified controller is actually accessible */
604 r
= controller_is_accessible(controller
);
608 return cg_get_path(controller
, path
, suffix
, fs
);
611 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
616 if (typeflag
!= FTW_DP
)
619 if (ftwbuf
->level
< 1)
626 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
627 _cleanup_free_
char *fs
= NULL
;
632 r
= cg_get_path(controller
, path
, NULL
, &fs
);
637 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
647 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
654 int cg_create(const char *controller
, const char *path
) {
655 _cleanup_free_
char *fs
= NULL
;
658 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
662 r
= mkdir_parents(fs
, 0755);
666 if (mkdir(fs
, 0755) < 0) {
677 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
682 r
= cg_create(controller
, path
);
686 q
= cg_attach(controller
, path
, pid
);
690 /* This does not remove the cgroup on failure */
694 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
695 _cleanup_free_
char *fs
= NULL
;
696 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
702 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
709 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
711 return write_string_file(fs
, c
, 0);
714 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
721 r
= cg_attach(controller
, path
, pid
);
723 char prefix
[strlen(path
) + 1];
725 /* This didn't work? Then let's try all prefixes of
728 PATH_FOREACH_PREFIX(prefix
, path
) {
731 q
= cg_attach(controller
, prefix
, pid
);
740 int cg_set_group_access(
741 const char *controller
,
747 _cleanup_free_
char *fs
= NULL
;
750 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
753 if (mode
!= MODE_INVALID
)
756 r
= cg_get_path(controller
, path
, NULL
, &fs
);
760 return chmod_and_chown(fs
, mode
, uid
, gid
);
763 int cg_set_task_access(
764 const char *controller
,
770 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
775 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
778 if (mode
!= MODE_INVALID
)
781 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
785 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
789 unified
= cg_unified();
795 /* Compatibility, Always keep values for "tasks" in sync with
797 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
798 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
803 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
804 _cleanup_fclose_
FILE *f
= NULL
;
813 unified
= cg_unified();
818 if (!cg_controller_is_valid(controller
))
821 controller
= SYSTEMD_CGROUP_CONTROLLER
;
823 cs
= strlen(controller
);
826 fs
= procfs_file_alloca(pid
, "cgroup");
829 return errno
== ENOENT
? -ESRCH
: -errno
;
831 FOREACH_LINE(line
, f
, return -errno
) {
837 e
= startswith(line
, "0:");
847 const char *word
, *state
;
850 l
= strchr(line
, ':');
860 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
861 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
882 int cg_install_release_agent(const char *controller
, const char *agent
) {
883 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
889 unified
= cg_unified();
892 if (unified
) /* doesn't apply to unified hierarchy */
895 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
899 r
= read_one_line_file(fs
, &contents
);
903 sc
= strstrip(contents
);
905 r
= write_string_file(fs
, agent
, 0);
908 } else if (!path_equal(sc
, agent
))
912 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
916 contents
= mfree(contents
);
917 r
= read_one_line_file(fs
, &contents
);
921 sc
= strstrip(contents
);
922 if (streq(sc
, "0")) {
923 r
= write_string_file(fs
, "1", 0);
936 int cg_uninstall_release_agent(const char *controller
) {
937 _cleanup_free_
char *fs
= NULL
;
940 unified
= cg_unified();
943 if (unified
) /* Doesn't apply to unified hierarchy */
946 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
950 r
= write_string_file(fs
, "0", 0);
956 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
960 r
= write_string_file(fs
, "", 0);
967 int cg_is_empty(const char *controller
, const char *path
) {
968 _cleanup_fclose_
FILE *f
= NULL
;
974 r
= cg_enumerate_processes(controller
, path
, &f
);
980 r
= cg_read_pid(f
, &pid
);
987 int cg_is_empty_recursive(const char *controller
, const char *path
) {
992 /* The root cgroup is always populated */
993 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
996 unified
= cg_unified();
1001 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1003 /* On the unified hierarchy we can check empty state
1004 * via the "cgroup.populated" attribute. */
1006 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1010 r
= read_one_line_file(populated
, &t
);
1016 return streq(t
, "0");
1018 _cleanup_closedir_
DIR *d
= NULL
;
1021 r
= cg_is_empty(controller
, path
);
1025 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1031 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1032 _cleanup_free_
char *p
= NULL
;
1034 p
= strjoin(path
, "/", fn
, NULL
);
1039 r
= cg_is_empty_recursive(controller
, p
);
1050 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1051 char *t
= NULL
, *u
= NULL
;
1057 if (!path_is_safe(spec
))
1065 *path
= path_kill_slashes(t
);
1074 e
= strchr(spec
, ':');
1076 if (!cg_controller_is_valid(spec
))
1093 t
= strndup(spec
, e
-spec
);
1096 if (!cg_controller_is_valid(t
)) {
1110 if (!path_is_safe(u
) ||
1111 !path_is_absolute(u
)) {
1117 path_kill_slashes(u
);
1133 int cg_mangle_path(const char *path
, char **result
) {
1134 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1141 /* First, check if it already is a filesystem path */
1142 if (path_startswith(path
, "/sys/fs/cgroup")) {
1148 *result
= path_kill_slashes(t
);
1152 /* Otherwise, treat it as cg spec */
1153 r
= cg_split_spec(path
, &c
, &p
);
1157 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1160 int cg_get_root_path(char **path
) {
1166 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1170 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1172 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1174 e
= endswith(p
, "/system"); /* even more legacy */
1182 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1183 _cleanup_free_
char *rt
= NULL
;
1191 /* If the root was specified let's use that, otherwise
1192 * let's determine it from PID 1 */
1194 r
= cg_get_root_path(&rt
);
1201 p
= path_startswith(cgroup
, root
);
1202 if (p
&& p
> cgroup
)
1210 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1211 _cleanup_free_
char *raw
= NULL
;
1218 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1222 r
= cg_shift_path(raw
, root
, &c
);
1242 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1249 n
= strcspn(cgroup
, "/");
1253 c
= strndupa(cgroup
, n
);
1256 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1267 static bool valid_slice_name(const char *p
, size_t n
) {
1272 if (n
< strlen("x.slice"))
1275 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1281 c
= cg_unescape(buf
);
1283 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1289 static const char *skip_slices(const char *p
) {
1292 /* Skips over all slice assignments */
1297 p
+= strspn(p
, "/");
1299 n
= strcspn(p
, "/");
1300 if (!valid_slice_name(p
, n
))
1307 int cg_path_get_unit(const char *path
, char **ret
) {
1315 e
= skip_slices(path
);
1317 r
= cg_path_decode_unit(e
, &unit
);
1321 /* We skipped over the slices, don't accept any now */
1322 if (endswith(unit
, ".slice")) {
1331 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1332 _cleanup_free_
char *cgroup
= NULL
;
1337 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1341 return cg_path_get_unit(cgroup
, unit
);
1345 * Skip session-*.scope, but require it to be there.
1347 static const char *skip_session(const char *p
) {
1353 p
+= strspn(p
, "/");
1355 n
= strcspn(p
, "/");
1356 if (n
< strlen("session-x.scope"))
1359 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1360 char buf
[n
- 8 - 6 + 1];
1362 memcpy(buf
, p
+ 8, n
- 8 - 6);
1365 /* Note that session scopes never need unescaping,
1366 * since they cannot conflict with the kernel's own
1367 * names, hence we don't need to call cg_unescape()
1370 if (!session_id_valid(buf
))
1374 p
+= strspn(p
, "/");
1382 * Skip user@*.service, but require it to be there.
1384 static const char *skip_user_manager(const char *p
) {
1390 p
+= strspn(p
, "/");
1392 n
= strcspn(p
, "/");
1393 if (n
< strlen("user@x.service"))
1396 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1397 char buf
[n
- 5 - 8 + 1];
1399 memcpy(buf
, p
+ 5, n
- 5 - 8);
1402 /* Note that user manager services never need unescaping,
1403 * since they cannot conflict with the kernel's own
1404 * names, hence we don't need to call cg_unescape()
1407 if (parse_uid(buf
, NULL
) < 0)
1411 p
+= strspn(p
, "/");
1419 static const char *skip_user_prefix(const char *path
) {
1424 /* Skip slices, if there are any */
1425 e
= skip_slices(path
);
1427 /* Skip the user manager, if it's in the path now... */
1428 t
= skip_user_manager(e
);
1432 /* Alternatively skip the user session if it is in the path... */
1433 return skip_session(e
);
1436 int cg_path_get_user_unit(const char *path
, char **ret
) {
1442 t
= skip_user_prefix(path
);
1446 /* And from here on it looks pretty much the same as for a
1447 * system unit, hence let's use the same parser from here
1449 return cg_path_get_unit(t
, ret
);
1452 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1453 _cleanup_free_
char *cgroup
= NULL
;
1458 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1462 return cg_path_get_user_unit(cgroup
, unit
);
1465 int cg_path_get_machine_name(const char *path
, char **machine
) {
1466 _cleanup_free_
char *u
= NULL
;
1470 r
= cg_path_get_unit(path
, &u
);
1474 sl
= strjoina("/run/systemd/machines/unit:", u
);
1475 return readlink_malloc(sl
, machine
);
1478 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1479 _cleanup_free_
char *cgroup
= NULL
;
1484 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1488 return cg_path_get_machine_name(cgroup
, machine
);
1491 int cg_path_get_session(const char *path
, char **session
) {
1492 _cleanup_free_
char *unit
= NULL
;
1498 r
= cg_path_get_unit(path
, &unit
);
1502 start
= startswith(unit
, "session-");
1505 end
= endswith(start
, ".scope");
1510 if (!session_id_valid(start
))
1526 int cg_pid_get_session(pid_t pid
, char **session
) {
1527 _cleanup_free_
char *cgroup
= NULL
;
1530 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1534 return cg_path_get_session(cgroup
, session
);
1537 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1538 _cleanup_free_
char *slice
= NULL
;
1544 r
= cg_path_get_slice(path
, &slice
);
1548 start
= startswith(slice
, "user-");
1551 end
= endswith(start
, ".slice");
1556 if (parse_uid(start
, uid
) < 0)
1562 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1563 _cleanup_free_
char *cgroup
= NULL
;
1566 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1570 return cg_path_get_owner_uid(cgroup
, uid
);
1573 int cg_path_get_slice(const char *p
, char **slice
) {
1574 const char *e
= NULL
;
1579 /* Finds the right-most slice unit from the beginning, but
1580 * stops before we come to the first non-slice unit. */
1585 p
+= strspn(p
, "/");
1587 n
= strcspn(p
, "/");
1588 if (!valid_slice_name(p
, n
)) {
1593 s
= strdup("-.slice");
1601 return cg_path_decode_unit(e
, slice
);
1609 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1610 _cleanup_free_
char *cgroup
= NULL
;
1615 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1619 return cg_path_get_slice(cgroup
, slice
);
1622 int cg_path_get_user_slice(const char *p
, char **slice
) {
1627 t
= skip_user_prefix(p
);
1631 /* And now it looks pretty much the same as for a system
1632 * slice, so let's just use the same parser from here on. */
1633 return cg_path_get_slice(t
, slice
);
1636 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1637 _cleanup_free_
char *cgroup
= NULL
;
1642 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1646 return cg_path_get_user_slice(cgroup
, slice
);
1649 char *cg_escape(const char *p
) {
1650 bool need_prefix
= false;
1652 /* This implements very minimal escaping for names to be used
1653 * as file names in the cgroup tree: any name which might
1654 * conflict with a kernel name or is prefixed with '_' is
1655 * prefixed with a '_'. That way, when reading cgroup names it
1656 * is sufficient to remove a single prefixing underscore if
1659 /* The return value of this function (unlike cg_unescape())
1665 streq(p
, "notify_on_release") ||
1666 streq(p
, "release_agent") ||
1667 streq(p
, "tasks") ||
1668 startswith(p
, "cgroup."))
1673 dot
= strrchr(p
, '.');
1678 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1681 n
= cgroup_controller_to_string(c
);
1686 if (memcmp(p
, n
, l
) != 0)
1696 return strappend("_", p
);
1701 char *cg_unescape(const char *p
) {
1704 /* The return value of this function (unlike cg_escape())
1705 * doesn't need free()! */
1713 #define CONTROLLER_VALID \
1717 bool cg_controller_is_valid(const char *p
) {
1723 s
= startswith(p
, "name=");
1727 if (*p
== 0 || *p
== '_')
1730 for (t
= p
; *t
; t
++)
1731 if (!strchr(CONTROLLER_VALID
, *t
))
1734 if (t
- p
> FILENAME_MAX
)
1740 int cg_slice_to_path(const char *unit
, char **ret
) {
1741 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1748 if (streq(unit
, "-.slice")) {
1758 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1761 if (!endswith(unit
, ".slice"))
1764 r
= unit_name_to_prefix(unit
, &p
);
1768 dash
= strchr(p
, '-');
1770 /* Don't allow initial dashes */
1775 _cleanup_free_
char *escaped
= NULL
;
1776 char n
[dash
- p
+ sizeof(".slice")];
1778 /* Don't allow trailing or double dashes */
1779 if (dash
[1] == 0 || dash
[1] == '-')
1782 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1783 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1786 escaped
= cg_escape(n
);
1790 if (!strextend(&s
, escaped
, "/", NULL
))
1793 dash
= strchr(dash
+1, '-');
1796 e
= cg_escape(unit
);
1800 if (!strextend(&s
, e
, NULL
))
1809 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1810 _cleanup_free_
char *p
= NULL
;
1813 r
= cg_get_path(controller
, path
, attribute
, &p
);
1817 return write_string_file(p
, value
, 0);
1820 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1821 _cleanup_free_
char *p
= NULL
;
1824 r
= cg_get_path(controller
, path
, attribute
, &p
);
1828 return read_one_line_file(p
, ret
);
1831 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1835 /* This one will create a cgroup in our private tree, but also
1836 * duplicate it in the trees specified in mask, and remove it
1839 /* First create the cgroup in our own hierarchy. */
1840 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1844 /* If we are in the unified hierarchy, we are done now */
1845 unified
= cg_unified();
1851 /* Otherwise, do the same in the other hierarchies */
1852 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1853 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1856 n
= cgroup_controller_to_string(c
);
1859 (void) cg_create(n
, path
);
1860 else if (supported
& bit
)
1861 (void) cg_trim(n
, path
, true);
1867 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1871 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1875 unified
= cg_unified();
1881 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1882 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1883 const char *p
= NULL
;
1885 if (!(supported
& bit
))
1889 p
= path_callback(bit
, userdata
);
1894 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1900 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1905 SET_FOREACH(pidp
, pids
, i
) {
1906 pid_t pid
= PTR_TO_PID(pidp
);
1909 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1910 if (q
< 0 && r
>= 0)
1917 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1921 if (!path_equal(from
, to
)) {
1922 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1927 unified
= cg_unified();
1933 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1934 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1935 const char *p
= NULL
;
1937 if (!(supported
& bit
))
1941 p
= to_callback(bit
, userdata
);
1946 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1952 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1956 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1960 unified
= cg_unified();
1966 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1967 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1969 if (!(supported
& bit
))
1972 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1978 int cg_mask_supported(CGroupMask
*ret
) {
1979 CGroupMask mask
= 0;
1982 /* Determines the mask of supported cgroup controllers. Only
1983 * includes controllers we can make sense of and that are
1984 * actually accessible. */
1986 unified
= cg_unified();
1990 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1993 /* In the unified hierarchy we can read the supported
1994 * and accessible controllers from a the top-level
1995 * cgroup attribute */
1997 r
= cg_get_root_path(&root
);
2001 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2005 r
= read_one_line_file(path
, &controllers
);
2011 _cleanup_free_
char *n
= NULL
;
2014 r
= extract_first_word(&c
, &n
, NULL
, 0);
2020 v
= cgroup_controller_from_string(n
);
2024 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2027 /* Currently, we only support the memory and pids
2028 * controller in the unified hierarchy, mask
2029 * everything else off. */
2030 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2035 /* In the legacy hierarchy, we check whether which
2036 * hierarchies are mounted. */
2038 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2041 n
= cgroup_controller_to_string(c
);
2042 if (controller_is_accessible(n
) >= 0)
2043 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2051 int cg_kernel_controllers(Set
*controllers
) {
2052 _cleanup_fclose_
FILE *f
= NULL
;
2056 assert(controllers
);
2058 /* Determines the full list of kernel-known controllers. Might
2059 * include controllers we don't actually support, arbitrary
2060 * named hierarchies and controllers that aren't currently
2061 * accessible (because not mounted). */
2063 f
= fopen("/proc/cgroups", "re");
2065 if (errno
== ENOENT
)
2070 /* Ignore the header line */
2071 (void) fgets(buf
, sizeof(buf
), f
);
2078 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2083 if (ferror(f
) && errno
!= 0)
2094 if (!cg_controller_is_valid(controller
)) {
2099 r
= set_consume(controllers
, controller
);
2107 static thread_local
int unified_cache
= -1;
2109 int cg_unified(void) {
2112 /* Checks if we support the unified hierarchy. Returns an
2113 * error when the cgroup hierarchies aren't mounted yet or we
2114 * have any other trouble determining if the unified hierarchy
2117 if (unified_cache
>= 0)
2118 return unified_cache
;
2120 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2123 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2124 unified_cache
= true;
2125 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2126 unified_cache
= false;
2130 return unified_cache
;
2133 void cg_unified_flush(void) {
2137 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2138 _cleanup_free_
char *fs
= NULL
;
2147 unified
= cg_unified();
2150 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2153 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2157 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2158 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2161 if (!(supported
& bit
))
2164 n
= cgroup_controller_to_string(c
);
2166 char s
[1 + strlen(n
) + 1];
2168 s
[0] = mask
& bit
? '+' : '-';
2171 r
= write_string_file(fs
, s
, 0);
2173 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2180 bool cg_is_unified_wanted(void) {
2181 static thread_local
int wanted
= -1;
2184 /* If the hierarchy is already mounted, then follow whatever
2185 * was chosen for it. */
2186 unified
= cg_unified();
2190 /* Otherwise, let's see what the kernel command line has to
2191 * say. Since checking that is expensive, let's cache the
2196 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2198 return (wanted
= true);
2200 _cleanup_free_
char *value
= NULL
;
2202 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2206 return (wanted
= false);
2208 return (wanted
= parse_boolean(value
) > 0);
2212 bool cg_is_legacy_wanted(void) {
2213 return !cg_is_unified_wanted();
2216 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2221 *ret
= CGROUP_CPU_SHARES_INVALID
;
2225 r
= safe_atou64(s
, &u
);
2229 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2236 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2241 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2245 r
= safe_atou64(s
, &u
);
2249 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2256 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2257 [CGROUP_CONTROLLER_CPU
] = "cpu",
2258 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2259 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2260 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2261 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2262 [CGROUP_CONTROLLER_PIDS
] = "pids",
2263 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2266 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);