1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "extract-word.h"
34 #include "formats-util.h"
35 #include "login-util.h"
38 #include "path-util.h"
39 #include "process-util.h"
42 #include "unit-name.h"
44 #include "cgroup-util.h"
46 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
47 _cleanup_free_
char *fs
= NULL
;
53 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
65 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
75 if (fscanf(f
, "%lu", &ul
) != 1) {
80 return errno
? -errno
: -EIO
;
90 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
91 _cleanup_free_
char *fs
= NULL
;
97 /* This is not recursive! */
99 r
= cg_get_path(controller
, path
, NULL
, &fs
);
111 int cg_read_subgroup(DIR *d
, char **fn
) {
117 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
120 if (de
->d_type
!= DT_DIR
)
123 if (streq(de
->d_name
, ".") ||
124 streq(de
->d_name
, ".."))
127 b
= strdup(de
->d_name
);
138 int cg_rmdir(const char *controller
, const char *path
) {
139 _cleanup_free_
char *p
= NULL
;
142 r
= cg_get_path(controller
, path
, NULL
, &p
);
147 if (r
< 0 && errno
!= ENOENT
)
153 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
154 _cleanup_set_free_ Set
*allocated_set
= NULL
;
161 /* This goes through the tasks list and kills them all. This
162 * is repeated until no further processes are added to the
163 * tasks list, to properly handle forking processes */
166 s
= allocated_set
= set_new(NULL
);
174 _cleanup_fclose_
FILE *f
= NULL
;
178 r
= cg_enumerate_processes(controller
, path
, &f
);
180 if (ret
>= 0 && r
!= -ENOENT
)
186 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
188 if (ignore_self
&& pid
== my_pid
)
191 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
194 /* If we haven't killed this process yet, kill
196 if (kill(pid
, sig
) < 0) {
197 if (ret
>= 0 && errno
!= ESRCH
)
200 if (sigcont
&& sig
!= SIGKILL
)
201 (void) kill(pid
, SIGCONT
);
209 r
= set_put(s
, PID_TO_PTR(pid
));
225 /* To avoid racing against processes which fork
226 * quicker than we can kill them we repeat this until
227 * no new pids need to be killed. */
234 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
235 _cleanup_set_free_ Set
*allocated_set
= NULL
;
236 _cleanup_closedir_
DIR *d
= NULL
;
244 s
= allocated_set
= set_new(NULL
);
249 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
251 r
= cg_enumerate_subgroups(controller
, path
, &d
);
253 if (ret
>= 0 && r
!= -ENOENT
)
259 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
260 _cleanup_free_
char *p
= NULL
;
262 p
= strjoin(path
, "/", fn
, NULL
);
267 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
268 if (r
!= 0 && ret
>= 0)
272 if (ret
>= 0 && r
< 0)
276 r
= cg_rmdir(controller
, path
);
277 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
284 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
286 _cleanup_set_free_ Set
*s
= NULL
;
302 _cleanup_fclose_
FILE *f
= NULL
;
306 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
308 if (ret
>= 0 && r
!= -ENOENT
)
314 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
316 /* This might do weird stuff if we aren't a
317 * single-threaded program. However, we
318 * luckily know we are not */
319 if (ignore_self
&& pid
== my_pid
)
322 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
325 /* Ignore kernel threads. Since they can only
326 * exist in the root cgroup, we only check for
329 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
330 is_kernel_thread(pid
) > 0)
333 r
= cg_attach(cto
, pto
, pid
);
335 if (ret
>= 0 && r
!= -ESRCH
)
342 r
= set_put(s
, PID_TO_PTR(pid
));
362 int cg_migrate_recursive(
370 _cleanup_closedir_
DIR *d
= NULL
;
379 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
381 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
383 if (ret
>= 0 && r
!= -ENOENT
)
389 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
390 _cleanup_free_
char *p
= NULL
;
392 p
= strjoin(pfrom
, "/", fn
, NULL
);
397 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
398 if (r
!= 0 && ret
>= 0)
402 if (r
< 0 && ret
>= 0)
406 r
= cg_rmdir(cfrom
, pfrom
);
407 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
414 int cg_migrate_recursive_fallback(
429 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
431 char prefix
[strlen(pto
) + 1];
433 /* This didn't work? Then let's try all prefixes of the destination */
435 PATH_FOREACH_PREFIX(prefix
, pto
) {
438 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
447 static const char *controller_to_dirname(const char *controller
) {
452 /* Converts a controller name to the directory name below
453 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
454 * just cuts off the name= prefixed used for named
455 * hierarchies, if it is specified. */
457 e
= startswith(controller
, "name=");
464 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
471 dn
= controller_to_dirname(controller
);
473 if (isempty(path
) && isempty(suffix
))
474 t
= strappend("/sys/fs/cgroup/", dn
);
475 else if (isempty(path
))
476 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
477 else if (isempty(suffix
))
478 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
480 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
488 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
493 if (isempty(path
) && isempty(suffix
))
494 t
= strdup("/sys/fs/cgroup");
495 else if (isempty(path
))
496 t
= strappend("/sys/fs/cgroup/", suffix
);
497 else if (isempty(suffix
))
498 t
= strappend("/sys/fs/cgroup/", path
);
500 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
508 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
516 /* If no controller is specified, we return the path
517 * *below* the controllers, without any prefix. */
519 if (!path
&& !suffix
)
527 t
= strjoin(path
, "/", suffix
, NULL
);
531 *fs
= path_kill_slashes(t
);
535 if (!cg_controller_is_valid(controller
))
538 unified
= cg_unified();
543 r
= join_path_unified(path
, suffix
, fs
);
545 r
= join_path_legacy(controller
, path
, suffix
, fs
);
549 path_kill_slashes(*fs
);
553 static int controller_is_accessible(const char *controller
) {
558 /* Checks whether a specific controller is accessible,
559 * i.e. its hierarchy mounted. In the unified hierarchy all
560 * controllers are considered accessible, except for the named
563 if (!cg_controller_is_valid(controller
))
566 unified
= cg_unified();
570 /* We don't support named hierarchies if we are using
571 * the unified hierarchy. */
573 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
576 if (startswith(controller
, "name="))
582 dn
= controller_to_dirname(controller
);
583 cc
= strjoina("/sys/fs/cgroup/", dn
);
585 if (laccess(cc
, F_OK
) < 0)
592 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
598 /* Check if the specified controller is actually accessible */
599 r
= controller_is_accessible(controller
);
603 return cg_get_path(controller
, path
, suffix
, fs
);
606 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
611 if (typeflag
!= FTW_DP
)
614 if (ftwbuf
->level
< 1)
621 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
622 _cleanup_free_
char *fs
= NULL
;
627 r
= cg_get_path(controller
, path
, NULL
, &fs
);
632 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
642 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
649 int cg_create(const char *controller
, const char *path
) {
650 _cleanup_free_
char *fs
= NULL
;
653 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
657 r
= mkdir_parents(fs
, 0755);
661 if (mkdir(fs
, 0755) < 0) {
672 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
677 r
= cg_create(controller
, path
);
681 q
= cg_attach(controller
, path
, pid
);
685 /* This does not remove the cgroup on failure */
689 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
690 _cleanup_free_
char *fs
= NULL
;
691 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
697 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
704 snprintf(c
, sizeof(c
), PID_FMT
"\n", pid
);
706 return write_string_file(fs
, c
, 0);
709 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
716 r
= cg_attach(controller
, path
, pid
);
718 char prefix
[strlen(path
) + 1];
720 /* This didn't work? Then let's try all prefixes of
723 PATH_FOREACH_PREFIX(prefix
, path
) {
726 q
= cg_attach(controller
, prefix
, pid
);
735 int cg_set_group_access(
736 const char *controller
,
742 _cleanup_free_
char *fs
= NULL
;
745 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
748 if (mode
!= MODE_INVALID
)
751 r
= cg_get_path(controller
, path
, NULL
, &fs
);
755 return chmod_and_chown(fs
, mode
, uid
, gid
);
758 int cg_set_task_access(
759 const char *controller
,
765 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
770 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
773 if (mode
!= MODE_INVALID
)
776 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
780 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
784 unified
= cg_unified();
790 /* Compatibility, Always keep values for "tasks" in sync with
792 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
793 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
798 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
799 _cleanup_fclose_
FILE *f
= NULL
;
808 unified
= cg_unified();
813 if (!cg_controller_is_valid(controller
))
816 controller
= SYSTEMD_CGROUP_CONTROLLER
;
818 cs
= strlen(controller
);
821 fs
= procfs_file_alloca(pid
, "cgroup");
824 return errno
== ENOENT
? -ESRCH
: -errno
;
826 FOREACH_LINE(line
, f
, return -errno
) {
832 e
= startswith(line
, "0:");
842 const char *word
, *state
;
845 l
= strchr(line
, ':');
855 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
856 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
877 int cg_install_release_agent(const char *controller
, const char *agent
) {
878 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
884 unified
= cg_unified();
887 if (unified
) /* doesn't apply to unified hierarchy */
890 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
894 r
= read_one_line_file(fs
, &contents
);
898 sc
= strstrip(contents
);
900 r
= write_string_file(fs
, agent
, 0);
903 } else if (!path_equal(sc
, agent
))
907 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
911 contents
= mfree(contents
);
912 r
= read_one_line_file(fs
, &contents
);
916 sc
= strstrip(contents
);
917 if (streq(sc
, "0")) {
918 r
= write_string_file(fs
, "1", 0);
931 int cg_uninstall_release_agent(const char *controller
) {
932 _cleanup_free_
char *fs
= NULL
;
935 unified
= cg_unified();
938 if (unified
) /* Doesn't apply to unified hierarchy */
941 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
945 r
= write_string_file(fs
, "0", 0);
951 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
955 r
= write_string_file(fs
, "", 0);
962 int cg_is_empty(const char *controller
, const char *path
) {
963 _cleanup_fclose_
FILE *f
= NULL
;
969 r
= cg_enumerate_processes(controller
, path
, &f
);
975 r
= cg_read_pid(f
, &pid
);
982 int cg_is_empty_recursive(const char *controller
, const char *path
) {
987 /* The root cgroup is always populated */
988 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
991 unified
= cg_unified();
996 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
998 /* On the unified hierarchy we can check empty state
999 * via the "cgroup.populated" attribute. */
1001 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1005 r
= read_one_line_file(populated
, &t
);
1011 return streq(t
, "0");
1013 _cleanup_closedir_
DIR *d
= NULL
;
1016 r
= cg_is_empty(controller
, path
);
1020 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1026 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1027 _cleanup_free_
char *p
= NULL
;
1029 p
= strjoin(path
, "/", fn
, NULL
);
1034 r
= cg_is_empty_recursive(controller
, p
);
1045 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1046 char *t
= NULL
, *u
= NULL
;
1052 if (!path_is_safe(spec
))
1060 *path
= path_kill_slashes(t
);
1069 e
= strchr(spec
, ':');
1071 if (!cg_controller_is_valid(spec
))
1088 t
= strndup(spec
, e
-spec
);
1091 if (!cg_controller_is_valid(t
)) {
1105 if (!path_is_safe(u
) ||
1106 !path_is_absolute(u
)) {
1112 path_kill_slashes(u
);
1128 int cg_mangle_path(const char *path
, char **result
) {
1129 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1136 /* First, check if it already is a filesystem path */
1137 if (path_startswith(path
, "/sys/fs/cgroup")) {
1143 *result
= path_kill_slashes(t
);
1147 /* Otherwise, treat it as cg spec */
1148 r
= cg_split_spec(path
, &c
, &p
);
1152 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1155 int cg_get_root_path(char **path
) {
1161 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1165 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1167 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1169 e
= endswith(p
, "/system"); /* even more legacy */
1177 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1178 _cleanup_free_
char *rt
= NULL
;
1186 /* If the root was specified let's use that, otherwise
1187 * let's determine it from PID 1 */
1189 r
= cg_get_root_path(&rt
);
1196 p
= path_startswith(cgroup
, root
);
1197 if (p
&& p
> cgroup
)
1205 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1206 _cleanup_free_
char *raw
= NULL
;
1213 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1217 r
= cg_shift_path(raw
, root
, &c
);
1237 int cg_path_decode_unit(const char *cgroup
, char **unit
){
1244 n
= strcspn(cgroup
, "/");
1248 c
= strndupa(cgroup
, n
);
1251 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1262 static bool valid_slice_name(const char *p
, size_t n
) {
1267 if (n
< strlen("x.slice"))
1270 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1276 c
= cg_unescape(buf
);
1278 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1284 static const char *skip_slices(const char *p
) {
1287 /* Skips over all slice assignments */
1292 p
+= strspn(p
, "/");
1294 n
= strcspn(p
, "/");
1295 if (!valid_slice_name(p
, n
))
1302 int cg_path_get_unit(const char *path
, char **ret
) {
1310 e
= skip_slices(path
);
1312 r
= cg_path_decode_unit(e
, &unit
);
1316 /* We skipped over the slices, don't accept any now */
1317 if (endswith(unit
, ".slice")) {
1326 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1327 _cleanup_free_
char *cgroup
= NULL
;
1332 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1336 return cg_path_get_unit(cgroup
, unit
);
1340 * Skip session-*.scope, but require it to be there.
1342 static const char *skip_session(const char *p
) {
1348 p
+= strspn(p
, "/");
1350 n
= strcspn(p
, "/");
1351 if (n
< strlen("session-x.scope"))
1354 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1355 char buf
[n
- 8 - 6 + 1];
1357 memcpy(buf
, p
+ 8, n
- 8 - 6);
1360 /* Note that session scopes never need unescaping,
1361 * since they cannot conflict with the kernel's own
1362 * names, hence we don't need to call cg_unescape()
1365 if (!session_id_valid(buf
))
1369 p
+= strspn(p
, "/");
1377 * Skip user@*.service, but require it to be there.
1379 static const char *skip_user_manager(const char *p
) {
1385 p
+= strspn(p
, "/");
1387 n
= strcspn(p
, "/");
1388 if (n
< strlen("user@x.service"))
1391 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1392 char buf
[n
- 5 - 8 + 1];
1394 memcpy(buf
, p
+ 5, n
- 5 - 8);
1397 /* Note that user manager services never need unescaping,
1398 * since they cannot conflict with the kernel's own
1399 * names, hence we don't need to call cg_unescape()
1402 if (parse_uid(buf
, NULL
) < 0)
1406 p
+= strspn(p
, "/");
1414 static const char *skip_user_prefix(const char *path
) {
1419 /* Skip slices, if there are any */
1420 e
= skip_slices(path
);
1422 /* Skip the user manager, if it's in the path now... */
1423 t
= skip_user_manager(e
);
1427 /* Alternatively skip the user session if it is in the path... */
1428 return skip_session(e
);
1431 int cg_path_get_user_unit(const char *path
, char **ret
) {
1437 t
= skip_user_prefix(path
);
1441 /* And from here on it looks pretty much the same as for a
1442 * system unit, hence let's use the same parser from here
1444 return cg_path_get_unit(t
, ret
);
1447 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1448 _cleanup_free_
char *cgroup
= NULL
;
1453 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1457 return cg_path_get_user_unit(cgroup
, unit
);
1460 int cg_path_get_machine_name(const char *path
, char **machine
) {
1461 _cleanup_free_
char *u
= NULL
;
1465 r
= cg_path_get_unit(path
, &u
);
1469 sl
= strjoina("/run/systemd/machines/unit:", u
);
1470 return readlink_malloc(sl
, machine
);
1473 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1474 _cleanup_free_
char *cgroup
= NULL
;
1479 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1483 return cg_path_get_machine_name(cgroup
, machine
);
1486 int cg_path_get_session(const char *path
, char **session
) {
1487 _cleanup_free_
char *unit
= NULL
;
1493 r
= cg_path_get_unit(path
, &unit
);
1497 start
= startswith(unit
, "session-");
1500 end
= endswith(start
, ".scope");
1505 if (!session_id_valid(start
))
1521 int cg_pid_get_session(pid_t pid
, char **session
) {
1522 _cleanup_free_
char *cgroup
= NULL
;
1525 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1529 return cg_path_get_session(cgroup
, session
);
1532 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1533 _cleanup_free_
char *slice
= NULL
;
1539 r
= cg_path_get_slice(path
, &slice
);
1543 start
= startswith(slice
, "user-");
1546 end
= endswith(start
, ".slice");
1551 if (parse_uid(start
, uid
) < 0)
1557 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1558 _cleanup_free_
char *cgroup
= NULL
;
1561 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1565 return cg_path_get_owner_uid(cgroup
, uid
);
1568 int cg_path_get_slice(const char *p
, char **slice
) {
1569 const char *e
= NULL
;
1574 /* Finds the right-most slice unit from the beginning, but
1575 * stops before we come to the first non-slice unit. */
1580 p
+= strspn(p
, "/");
1582 n
= strcspn(p
, "/");
1583 if (!valid_slice_name(p
, n
)) {
1588 s
= strdup("-.slice");
1596 return cg_path_decode_unit(e
, slice
);
1604 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1605 _cleanup_free_
char *cgroup
= NULL
;
1610 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1614 return cg_path_get_slice(cgroup
, slice
);
1617 int cg_path_get_user_slice(const char *p
, char **slice
) {
1622 t
= skip_user_prefix(p
);
1626 /* And now it looks pretty much the same as for a system
1627 * slice, so let's just use the same parser from here on. */
1628 return cg_path_get_slice(t
, slice
);
1631 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1632 _cleanup_free_
char *cgroup
= NULL
;
1637 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1641 return cg_path_get_user_slice(cgroup
, slice
);
1644 char *cg_escape(const char *p
) {
1645 bool need_prefix
= false;
1647 /* This implements very minimal escaping for names to be used
1648 * as file names in the cgroup tree: any name which might
1649 * conflict with a kernel name or is prefixed with '_' is
1650 * prefixed with a '_'. That way, when reading cgroup names it
1651 * is sufficient to remove a single prefixing underscore if
1654 /* The return value of this function (unlike cg_unescape())
1660 streq(p
, "notify_on_release") ||
1661 streq(p
, "release_agent") ||
1662 streq(p
, "tasks") ||
1663 startswith(p
, "cgroup."))
1668 dot
= strrchr(p
, '.');
1673 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1676 n
= cgroup_controller_to_string(c
);
1681 if (memcmp(p
, n
, l
) != 0)
1691 return strappend("_", p
);
1696 char *cg_unescape(const char *p
) {
1699 /* The return value of this function (unlike cg_escape())
1700 * doesn't need free()! */
1708 #define CONTROLLER_VALID \
1712 bool cg_controller_is_valid(const char *p
) {
1718 s
= startswith(p
, "name=");
1722 if (*p
== 0 || *p
== '_')
1725 for (t
= p
; *t
; t
++)
1726 if (!strchr(CONTROLLER_VALID
, *t
))
1729 if (t
- p
> FILENAME_MAX
)
1735 int cg_slice_to_path(const char *unit
, char **ret
) {
1736 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1743 if (streq(unit
, "-.slice")) {
1753 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1756 if (!endswith(unit
, ".slice"))
1759 r
= unit_name_to_prefix(unit
, &p
);
1763 dash
= strchr(p
, '-');
1765 /* Don't allow initial dashes */
1770 _cleanup_free_
char *escaped
= NULL
;
1771 char n
[dash
- p
+ sizeof(".slice")];
1773 /* Don't allow trailing or double dashes */
1774 if (dash
[1] == 0 || dash
[1] == '-')
1777 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1778 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1781 escaped
= cg_escape(n
);
1785 if (!strextend(&s
, escaped
, "/", NULL
))
1788 dash
= strchr(dash
+1, '-');
1791 e
= cg_escape(unit
);
1795 if (!strextend(&s
, e
, NULL
))
1804 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1805 _cleanup_free_
char *p
= NULL
;
1808 r
= cg_get_path(controller
, path
, attribute
, &p
);
1812 return write_string_file(p
, value
, 0);
1815 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1816 _cleanup_free_
char *p
= NULL
;
1819 r
= cg_get_path(controller
, path
, attribute
, &p
);
1823 return read_one_line_file(p
, ret
);
1826 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1830 /* This one will create a cgroup in our private tree, but also
1831 * duplicate it in the trees specified in mask, and remove it
1834 /* First create the cgroup in our own hierarchy. */
1835 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1839 /* If we are in the unified hierarchy, we are done now */
1840 unified
= cg_unified();
1846 /* Otherwise, do the same in the other hierarchies */
1847 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1848 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1851 n
= cgroup_controller_to_string(c
);
1854 (void) cg_create(n
, path
);
1855 else if (supported
& bit
)
1856 (void) cg_trim(n
, path
, true);
1862 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1866 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1870 unified
= cg_unified();
1876 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1877 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1878 const char *p
= NULL
;
1880 if (!(supported
& bit
))
1884 p
= path_callback(bit
, userdata
);
1889 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1895 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1900 SET_FOREACH(pidp
, pids
, i
) {
1901 pid_t pid
= PTR_TO_PID(pidp
);
1904 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1905 if (q
< 0 && r
>= 0)
1912 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1916 if (!path_equal(from
, to
)) {
1917 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1922 unified
= cg_unified();
1928 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1929 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1930 const char *p
= NULL
;
1932 if (!(supported
& bit
))
1936 p
= to_callback(bit
, userdata
);
1941 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1947 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1951 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1955 unified
= cg_unified();
1961 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1962 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1964 if (!(supported
& bit
))
1967 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1973 int cg_mask_supported(CGroupMask
*ret
) {
1974 CGroupMask mask
= 0;
1977 /* Determines the mask of supported cgroup controllers. Only
1978 * includes controllers we can make sense of and that are
1979 * actually accessible. */
1981 unified
= cg_unified();
1985 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
1988 /* In the unified hierarchy we can read the supported
1989 * and accessible controllers from a the top-level
1990 * cgroup attribute */
1992 r
= cg_get_root_path(&root
);
1996 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2000 r
= read_one_line_file(path
, &controllers
);
2006 _cleanup_free_
char *n
= NULL
;
2009 r
= extract_first_word(&c
, &n
, NULL
, 0);
2015 v
= cgroup_controller_from_string(n
);
2019 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2022 /* Currently, we only support the memory and pids
2023 * controller in the unified hierarchy, mask
2024 * everything else off. */
2025 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2030 /* In the legacy hierarchy, we check whether which
2031 * hierarchies are mounted. */
2033 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2036 n
= cgroup_controller_to_string(c
);
2037 if (controller_is_accessible(n
) >= 0)
2038 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2046 int cg_kernel_controllers(Set
*controllers
) {
2047 _cleanup_fclose_
FILE *f
= NULL
;
2051 assert(controllers
);
2053 /* Determines the full list of kernel-known controllers. Might
2054 * include controllers we don't actually support, arbitrary
2055 * named hierarchies and controllers that aren't currently
2056 * accessible (because not mounted). */
2058 f
= fopen("/proc/cgroups", "re");
2060 if (errno
== ENOENT
)
2065 /* Ignore the header line */
2066 (void) fgets(buf
, sizeof(buf
), f
);
2073 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2078 if (ferror(f
) && errno
!= 0)
2089 if (!cg_controller_is_valid(controller
)) {
2094 r
= set_consume(controllers
, controller
);
2102 static thread_local
int unified_cache
= -1;
2104 int cg_unified(void) {
2107 /* Checks if we support the unified hierarchy. Returns an
2108 * error when the cgroup hierarchies aren't mounted yet or we
2109 * have any other trouble determining if the unified hierarchy
2112 if (unified_cache
>= 0)
2113 return unified_cache
;
2115 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2118 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2119 unified_cache
= true;
2120 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2121 unified_cache
= false;
2125 return unified_cache
;
2128 void cg_unified_flush(void) {
2132 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2133 _cleanup_free_
char *fs
= NULL
;
2142 unified
= cg_unified();
2145 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2148 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2152 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2153 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2156 if (!(supported
& bit
))
2159 n
= cgroup_controller_to_string(c
);
2161 char s
[1 + strlen(n
) + 1];
2163 s
[0] = mask
& bit
? '+' : '-';
2166 r
= write_string_file(fs
, s
, 0);
2168 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2175 bool cg_is_unified_wanted(void) {
2176 static thread_local
int wanted
= -1;
2179 /* If the hierarchy is already mounted, then follow whatever
2180 * was chosen for it. */
2181 unified
= cg_unified();
2185 /* Otherwise, let's see what the kernel command line has to
2186 * say. Since checking that is expensive, let's cache the
2191 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2193 return (wanted
= true);
2195 _cleanup_free_
char *value
= NULL
;
2197 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2201 return (wanted
= false);
2203 return (wanted
= parse_boolean(value
) > 0);
2207 bool cg_is_legacy_wanted(void) {
2208 return !cg_is_unified_wanted();
2211 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2216 *ret
= CGROUP_CPU_SHARES_INVALID
;
2220 r
= safe_atou64(s
, &u
);
2224 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2231 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2236 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2240 r
= safe_atou64(s
, &u
);
2244 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2251 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2252 [CGROUP_CONTROLLER_CPU
] = "cpu",
2253 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2254 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2255 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2256 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2257 [CGROUP_CONTROLLER_PIDS
] = "pids",
2258 [CGROUP_CONTROLLER_NET_CLS
] = "net_cls",
2261 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);