1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <stdio_ext.h>
31 #include <sys/statfs.h>
32 #include <sys/types.h>
33 #include <sys/xattr.h>
36 #include "alloc-util.h"
37 #include "cgroup-util.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
43 #include "format-util.h"
46 #include "login-util.h"
50 #include "parse-util.h"
51 #include "path-util.h"
52 #include "proc-cmdline.h"
53 #include "process-util.h"
56 #include "stat-util.h"
57 #include "stdio-util.h"
58 #include "string-table.h"
59 #include "string-util.h"
61 #include "unit-name.h"
62 #include "user-util.h"
64 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
65 _cleanup_free_
char *fs
= NULL
;
71 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
83 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
93 if (fscanf(f
, "%lu", &ul
) != 1) {
98 return errno
> 0 ? -errno
: -EIO
;
109 const char *controller
,
114 _cleanup_free_
char *events
= NULL
, *content
= NULL
;
118 r
= cg_get_path(controller
, path
, "cgroup.events", &events
);
122 r
= read_full_file(events
, &content
, NULL
);
127 while ((line
= strsep(&p
, "\n"))) {
130 key
= strsep(&line
, " ");
134 if (strcmp(key
, event
))
144 bool cg_ns_supported(void) {
145 static thread_local
int enabled
= -1;
150 if (access("/proc/self/ns/cgroup", F_OK
) == 0)
158 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
159 _cleanup_free_
char *fs
= NULL
;
165 /* This is not recursive! */
167 r
= cg_get_path(controller
, path
, NULL
, &fs
);
179 int cg_read_subgroup(DIR *d
, char **fn
) {
185 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
188 if (de
->d_type
!= DT_DIR
)
191 if (dot_or_dot_dot(de
->d_name
))
194 b
= strdup(de
->d_name
);
205 int cg_rmdir(const char *controller
, const char *path
) {
206 _cleanup_free_
char *p
= NULL
;
209 r
= cg_get_path(controller
, path
, NULL
, &p
);
214 if (r
< 0 && errno
!= ENOENT
)
217 r
= cg_hybrid_unified();
223 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
224 r
= cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY
, path
);
226 log_warning_errno(r
, "Failed to remove compat systemd cgroup %s: %m", path
);
233 const char *controller
,
238 cg_kill_log_func_t log_kill
,
241 _cleanup_set_free_ Set
*allocated_set
= NULL
;
248 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
249 * SIGCONT on SIGKILL. */
250 if (IN_SET(sig
, SIGCONT
, SIGKILL
))
251 flags
&= ~CGROUP_SIGCONT
;
253 /* This goes through the tasks list and kills them all. This
254 * is repeated until no further processes are added to the
255 * tasks list, to properly handle forking processes */
258 s
= allocated_set
= set_new(NULL
);
263 my_pid
= getpid_cached();
266 _cleanup_fclose_
FILE *f
= NULL
;
270 r
= cg_enumerate_processes(controller
, path
, &f
);
272 if (ret
>= 0 && r
!= -ENOENT
)
278 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
280 if ((flags
& CGROUP_IGNORE_SELF
) && pid
== my_pid
)
283 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
287 log_kill(pid
, sig
, userdata
);
289 /* If we haven't killed this process yet, kill
291 if (kill(pid
, sig
) < 0) {
292 if (ret
>= 0 && errno
!= ESRCH
)
295 if (flags
& CGROUP_SIGCONT
)
296 (void) kill(pid
, SIGCONT
);
304 r
= set_put(s
, PID_TO_PTR(pid
));
320 /* To avoid racing against processes which fork
321 * quicker than we can kill them we repeat this until
322 * no new pids need to be killed. */
329 int cg_kill_recursive(
330 const char *controller
,
335 cg_kill_log_func_t log_kill
,
338 _cleanup_set_free_ Set
*allocated_set
= NULL
;
339 _cleanup_closedir_
DIR *d
= NULL
;
347 s
= allocated_set
= set_new(NULL
);
352 ret
= cg_kill(controller
, path
, sig
, flags
, s
, log_kill
, userdata
);
354 r
= cg_enumerate_subgroups(controller
, path
, &d
);
356 if (ret
>= 0 && r
!= -ENOENT
)
362 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
363 _cleanup_free_
char *p
= NULL
;
365 p
= strjoin(path
, "/", fn
);
370 r
= cg_kill_recursive(controller
, p
, sig
, flags
, s
, log_kill
, userdata
);
371 if (r
!= 0 && ret
>= 0)
374 if (ret
>= 0 && r
< 0)
377 if (flags
& CGROUP_REMOVE
) {
378 r
= cg_rmdir(controller
, path
);
379 if (r
< 0 && ret
>= 0 && !IN_SET(r
, -ENOENT
, -EBUSY
))
394 _cleanup_set_free_ Set
*s
= NULL
;
407 my_pid
= getpid_cached();
410 _cleanup_fclose_
FILE *f
= NULL
;
414 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
416 if (ret
>= 0 && r
!= -ENOENT
)
422 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
424 /* This might do weird stuff if we aren't a
425 * single-threaded program. However, we
426 * luckily know we are not */
427 if ((flags
& CGROUP_IGNORE_SELF
) && pid
== my_pid
)
430 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
433 /* Ignore kernel threads. Since they can only
434 * exist in the root cgroup, we only check for
437 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
438 is_kernel_thread(pid
) > 0)
441 r
= cg_attach(cto
, pto
, pid
);
443 if (ret
>= 0 && r
!= -ESRCH
)
450 r
= set_put(s
, PID_TO_PTR(pid
));
470 int cg_migrate_recursive(
477 _cleanup_closedir_
DIR *d
= NULL
;
486 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, flags
);
488 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
490 if (ret
>= 0 && r
!= -ENOENT
)
496 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
497 _cleanup_free_
char *p
= NULL
;
499 p
= strjoin(pfrom
, "/", fn
);
504 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, flags
);
505 if (r
!= 0 && ret
>= 0)
509 if (r
< 0 && ret
>= 0)
512 if (flags
& CGROUP_REMOVE
) {
513 r
= cg_rmdir(cfrom
, pfrom
);
514 if (r
< 0 && ret
>= 0 && !IN_SET(r
, -ENOENT
, -EBUSY
))
521 int cg_migrate_recursive_fallback(
535 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, flags
);
537 char prefix
[strlen(pto
) + 1];
539 /* This didn't work? Then let's try all prefixes of the destination */
541 PATH_FOREACH_PREFIX(prefix
, pto
) {
544 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, flags
);
553 static const char *controller_to_dirname(const char *controller
) {
558 /* Converts a controller name to the directory name below
559 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
560 * just cuts off the name= prefixed used for named
561 * hierarchies, if it is specified. */
563 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
564 if (cg_hybrid_unified() > 0)
565 controller
= SYSTEMD_CGROUP_CONTROLLER_HYBRID
;
567 controller
= SYSTEMD_CGROUP_CONTROLLER_LEGACY
;
570 e
= startswith(controller
, "name=");
577 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
584 dn
= controller_to_dirname(controller
);
586 if (isempty(path
) && isempty(suffix
))
587 t
= strappend("/sys/fs/cgroup/", dn
);
588 else if (isempty(path
))
589 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
);
590 else if (isempty(suffix
))
591 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
);
593 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
);
601 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
606 if (isempty(path
) && isempty(suffix
))
607 t
= strdup("/sys/fs/cgroup");
608 else if (isempty(path
))
609 t
= strappend("/sys/fs/cgroup/", suffix
);
610 else if (isempty(suffix
))
611 t
= strappend("/sys/fs/cgroup/", path
);
613 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
);
621 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
629 /* If no controller is specified, we return the path
630 * *below* the controllers, without any prefix. */
632 if (!path
&& !suffix
)
640 t
= strjoin(path
, "/", suffix
);
644 *fs
= path_kill_slashes(t
);
648 if (!cg_controller_is_valid(controller
))
651 r
= cg_all_unified();
655 r
= join_path_unified(path
, suffix
, fs
);
657 r
= join_path_legacy(controller
, path
, suffix
, fs
);
661 path_kill_slashes(*fs
);
665 static int controller_is_accessible(const char *controller
) {
670 /* Checks whether a specific controller is accessible,
671 * i.e. its hierarchy mounted. In the unified hierarchy all
672 * controllers are considered accessible, except for the named
675 if (!cg_controller_is_valid(controller
))
678 r
= cg_all_unified();
682 /* We don't support named hierarchies if we are using
683 * the unified hierarchy. */
685 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
688 if (startswith(controller
, "name="))
694 dn
= controller_to_dirname(controller
);
695 cc
= strjoina("/sys/fs/cgroup/", dn
);
697 if (laccess(cc
, F_OK
) < 0)
704 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
710 /* Check if the specified controller is actually accessible */
711 r
= controller_is_accessible(controller
);
715 return cg_get_path(controller
, path
, suffix
, fs
);
718 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
723 if (typeflag
!= FTW_DP
)
726 if (ftwbuf
->level
< 1)
733 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
734 _cleanup_free_
char *fs
= NULL
;
739 r
= cg_get_path(controller
, path
, NULL
, &fs
);
744 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
754 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
758 q
= cg_hybrid_unified();
761 if (q
> 0 && streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
762 q
= cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY
, path
, delete_root
);
764 log_warning_errno(q
, "Failed to trim compat systemd cgroup %s: %m", path
);
770 int cg_create(const char *controller
, const char *path
) {
771 _cleanup_free_
char *fs
= NULL
;
774 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
778 r
= mkdir_parents(fs
, 0755);
782 r
= mkdir_errno_wrapper(fs
, 0755);
788 r
= cg_hybrid_unified();
792 if (r
> 0 && streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
793 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY
, path
);
795 log_warning_errno(r
, "Failed to create compat systemd cgroup %s: %m", path
);
801 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
806 r
= cg_create(controller
, path
);
810 q
= cg_attach(controller
, path
, pid
);
814 /* This does not remove the cgroup on failure */
818 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
819 _cleanup_free_
char *fs
= NULL
;
820 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
826 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
831 pid
= getpid_cached();
833 xsprintf(c
, PID_FMT
"\n", pid
);
835 r
= write_string_file(fs
, c
, 0);
839 r
= cg_hybrid_unified();
843 if (r
> 0 && streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
844 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY
, path
, pid
);
846 log_warning_errno(r
, "Failed to attach "PID_FMT
" to compat systemd cgroup %s: %m", pid
, path
);
852 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
859 r
= cg_attach(controller
, path
, pid
);
861 char prefix
[strlen(path
) + 1];
863 /* This didn't work? Then let's try all prefixes of
866 PATH_FOREACH_PREFIX(prefix
, path
) {
869 q
= cg_attach(controller
, prefix
, pid
);
879 const char *controller
,
889 /* cgroupsv1, aka legacy/non-unified */
890 static const struct Attribute legacy_attributes
[] = {
891 { "cgroup.procs", true },
893 { "cgroup.clone_children", false },
897 /* cgroupsv2, aka unified */
898 static const struct Attribute unified_attributes
[] = {
899 { "cgroup.procs", true },
900 { "cgroup.subtree_control", true },
901 { "cgroup.threads", false },
905 static const struct Attribute
* const attributes
[] = {
906 [false] = legacy_attributes
,
907 [true] = unified_attributes
,
910 _cleanup_free_
char *fs
= NULL
;
911 const struct Attribute
*i
;
916 if (uid
== UID_INVALID
&& gid
== GID_INVALID
)
919 unified
= cg_unified_controller(controller
);
923 /* Configure access to the cgroup itself */
924 r
= cg_get_path(controller
, path
, NULL
, &fs
);
928 r
= chmod_and_chown(fs
, 0755, uid
, gid
);
932 /* Configure access to the cgroup's attributes */
933 for (i
= attributes
[unified
]; i
->name
; i
++) {
936 r
= cg_get_path(controller
, path
, i
->name
, &fs
);
940 r
= chmod_and_chown(fs
, 0644, uid
, gid
);
945 log_debug_errno(r
, "Failed to set access on cgroup %s, ignoring: %m", fs
);
949 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
)) {
950 r
= cg_hybrid_unified();
954 /* Always propagate access mode from unified to legacy controller */
955 r
= cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY
, path
, uid
, gid
);
957 log_debug_errno(r
, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path
);
964 int cg_set_xattr(const char *controller
, const char *path
, const char *name
, const void *value
, size_t size
, int flags
) {
965 _cleanup_free_
char *fs
= NULL
;
970 assert(value
|| size
<= 0);
972 r
= cg_get_path(controller
, path
, NULL
, &fs
);
976 if (setxattr(fs
, name
, value
, size
, flags
) < 0)
982 int cg_get_xattr(const char *controller
, const char *path
, const char *name
, void *value
, size_t size
) {
983 _cleanup_free_
char *fs
= NULL
;
990 r
= cg_get_path(controller
, path
, NULL
, &fs
);
994 n
= getxattr(fs
, name
, value
, size
);
1001 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
1002 _cleanup_fclose_
FILE *f
= NULL
;
1003 char line
[LINE_MAX
];
1004 const char *fs
, *controller_str
;
1012 if (!cg_controller_is_valid(controller
))
1015 controller
= SYSTEMD_CGROUP_CONTROLLER
;
1017 unified
= cg_unified_controller(controller
);
1021 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
1022 controller_str
= SYSTEMD_CGROUP_CONTROLLER_LEGACY
;
1024 controller_str
= controller
;
1026 cs
= strlen(controller_str
);
1029 fs
= procfs_file_alloca(pid
, "cgroup");
1030 f
= fopen(fs
, "re");
1032 return errno
== ENOENT
? -ESRCH
: -errno
;
1034 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
1036 FOREACH_LINE(line
, f
, return -errno
) {
1042 e
= startswith(line
, "0:");
1052 const char *word
, *state
;
1055 l
= strchr(line
, ':');
1065 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
)
1066 if (k
== cs
&& memcmp(word
, controller_str
, cs
) == 0) {
1078 /* Truncate suffix indicating the process is a zombie */
1079 e
= endswith(p
, " (deleted)");
1090 int cg_install_release_agent(const char *controller
, const char *agent
) {
1091 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
1097 r
= cg_unified_controller(controller
);
1100 if (r
> 0) /* doesn't apply to unified hierarchy */
1103 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
1107 r
= read_one_line_file(fs
, &contents
);
1111 sc
= strstrip(contents
);
1113 r
= write_string_file(fs
, agent
, 0);
1116 } else if (!path_equal(sc
, agent
))
1120 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
1124 contents
= mfree(contents
);
1125 r
= read_one_line_file(fs
, &contents
);
1129 sc
= strstrip(contents
);
1130 if (streq(sc
, "0")) {
1131 r
= write_string_file(fs
, "1", 0);
1138 if (!streq(sc
, "1"))
1144 int cg_uninstall_release_agent(const char *controller
) {
1145 _cleanup_free_
char *fs
= NULL
;
1148 r
= cg_unified_controller(controller
);
1151 if (r
> 0) /* Doesn't apply to unified hierarchy */
1154 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
1158 r
= write_string_file(fs
, "0", 0);
1164 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
1168 r
= write_string_file(fs
, "", 0);
1175 int cg_is_empty(const char *controller
, const char *path
) {
1176 _cleanup_fclose_
FILE *f
= NULL
;
1182 r
= cg_enumerate_processes(controller
, path
, &f
);
1188 r
= cg_read_pid(f
, &pid
);
1195 int cg_is_empty_recursive(const char *controller
, const char *path
) {
1200 /* The root cgroup is always populated */
1201 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
1204 r
= cg_unified_controller(controller
);
1208 _cleanup_free_
char *t
= NULL
;
1210 /* On the unified hierarchy we can check empty state
1211 * via the "populated" attribute of "cgroup.events". */
1213 r
= cg_read_event(controller
, path
, "populated", &t
);
1217 return streq(t
, "0");
1219 _cleanup_closedir_
DIR *d
= NULL
;
1222 r
= cg_is_empty(controller
, path
);
1226 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1232 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1233 _cleanup_free_
char *p
= NULL
;
1235 p
= strjoin(path
, "/", fn
);
1240 r
= cg_is_empty_recursive(controller
, p
);
1251 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1252 char *t
= NULL
, *u
= NULL
;
1258 if (!path_is_normalized(spec
))
1266 *path
= path_kill_slashes(t
);
1275 e
= strchr(spec
, ':');
1277 if (!cg_controller_is_valid(spec
))
1294 t
= strndup(spec
, e
-spec
);
1297 if (!cg_controller_is_valid(t
)) {
1311 if (!path_is_normalized(u
) ||
1312 !path_is_absolute(u
)) {
1318 path_kill_slashes(u
);
1334 int cg_mangle_path(const char *path
, char **result
) {
1335 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1342 /* First, check if it already is a filesystem path */
1343 if (path_startswith(path
, "/sys/fs/cgroup")) {
1349 *result
= path_kill_slashes(t
);
1353 /* Otherwise, treat it as cg spec */
1354 r
= cg_split_spec(path
, &c
, &p
);
1358 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1361 int cg_get_root_path(char **path
) {
1367 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1371 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1373 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1375 e
= endswith(p
, "/system"); /* even more legacy */
1383 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1384 _cleanup_free_
char *rt
= NULL
;
1392 /* If the root was specified let's use that, otherwise
1393 * let's determine it from PID 1 */
1395 r
= cg_get_root_path(&rt
);
1402 p
= path_startswith(cgroup
, root
);
1403 if (p
&& p
> cgroup
)
1411 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1412 _cleanup_free_
char *raw
= NULL
;
1419 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1423 r
= cg_shift_path(raw
, root
, &c
);
1428 *cgroup
= TAKE_PTR(raw
);
1442 int cg_path_decode_unit(const char *cgroup
, char **unit
) {
1449 n
= strcspn(cgroup
, "/");
1453 c
= strndupa(cgroup
, n
);
1456 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1467 static bool valid_slice_name(const char *p
, size_t n
) {
1472 if (n
< STRLEN("x.slice"))
1475 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1481 c
= cg_unescape(buf
);
1483 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1489 static const char *skip_slices(const char *p
) {
1492 /* Skips over all slice assignments */
1497 p
+= strspn(p
, "/");
1499 n
= strcspn(p
, "/");
1500 if (!valid_slice_name(p
, n
))
1507 int cg_path_get_unit(const char *path
, char **ret
) {
1515 e
= skip_slices(path
);
1517 r
= cg_path_decode_unit(e
, &unit
);
1521 /* We skipped over the slices, don't accept any now */
1522 if (endswith(unit
, ".slice")) {
1531 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1532 _cleanup_free_
char *cgroup
= NULL
;
1537 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1541 return cg_path_get_unit(cgroup
, unit
);
1545 * Skip session-*.scope, but require it to be there.
1547 static const char *skip_session(const char *p
) {
1553 p
+= strspn(p
, "/");
1555 n
= strcspn(p
, "/");
1556 if (n
< STRLEN("session-x.scope"))
1559 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1560 char buf
[n
- 8 - 6 + 1];
1562 memcpy(buf
, p
+ 8, n
- 8 - 6);
1565 /* Note that session scopes never need unescaping,
1566 * since they cannot conflict with the kernel's own
1567 * names, hence we don't need to call cg_unescape()
1570 if (!session_id_valid(buf
))
1574 p
+= strspn(p
, "/");
1582 * Skip user@*.service, but require it to be there.
1584 static const char *skip_user_manager(const char *p
) {
1590 p
+= strspn(p
, "/");
1592 n
= strcspn(p
, "/");
1593 if (n
< STRLEN("user@x.service"))
1596 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1597 char buf
[n
- 5 - 8 + 1];
1599 memcpy(buf
, p
+ 5, n
- 5 - 8);
1602 /* Note that user manager services never need unescaping,
1603 * since they cannot conflict with the kernel's own
1604 * names, hence we don't need to call cg_unescape()
1607 if (parse_uid(buf
, NULL
) < 0)
1611 p
+= strspn(p
, "/");
1619 static const char *skip_user_prefix(const char *path
) {
1624 /* Skip slices, if there are any */
1625 e
= skip_slices(path
);
1627 /* Skip the user manager, if it's in the path now... */
1628 t
= skip_user_manager(e
);
1632 /* Alternatively skip the user session if it is in the path... */
1633 return skip_session(e
);
1636 int cg_path_get_user_unit(const char *path
, char **ret
) {
1642 t
= skip_user_prefix(path
);
1646 /* And from here on it looks pretty much the same as for a
1647 * system unit, hence let's use the same parser from here
1649 return cg_path_get_unit(t
, ret
);
1652 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1653 _cleanup_free_
char *cgroup
= NULL
;
1658 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1662 return cg_path_get_user_unit(cgroup
, unit
);
1665 int cg_path_get_machine_name(const char *path
, char **machine
) {
1666 _cleanup_free_
char *u
= NULL
;
1670 r
= cg_path_get_unit(path
, &u
);
1674 sl
= strjoina("/run/systemd/machines/unit:", u
);
1675 return readlink_malloc(sl
, machine
);
1678 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1679 _cleanup_free_
char *cgroup
= NULL
;
1684 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1688 return cg_path_get_machine_name(cgroup
, machine
);
1691 int cg_path_get_session(const char *path
, char **session
) {
1692 _cleanup_free_
char *unit
= NULL
;
1698 r
= cg_path_get_unit(path
, &unit
);
1702 start
= startswith(unit
, "session-");
1705 end
= endswith(start
, ".scope");
1710 if (!session_id_valid(start
))
1726 int cg_pid_get_session(pid_t pid
, char **session
) {
1727 _cleanup_free_
char *cgroup
= NULL
;
1730 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1734 return cg_path_get_session(cgroup
, session
);
1737 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1738 _cleanup_free_
char *slice
= NULL
;
1744 r
= cg_path_get_slice(path
, &slice
);
1748 start
= startswith(slice
, "user-");
1751 end
= endswith(start
, ".slice");
1756 if (parse_uid(start
, uid
) < 0)
1762 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1763 _cleanup_free_
char *cgroup
= NULL
;
1766 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1770 return cg_path_get_owner_uid(cgroup
, uid
);
1773 int cg_path_get_slice(const char *p
, char **slice
) {
1774 const char *e
= NULL
;
1779 /* Finds the right-most slice unit from the beginning, but
1780 * stops before we come to the first non-slice unit. */
1785 p
+= strspn(p
, "/");
1787 n
= strcspn(p
, "/");
1788 if (!valid_slice_name(p
, n
)) {
1793 s
= strdup(SPECIAL_ROOT_SLICE
);
1801 return cg_path_decode_unit(e
, slice
);
1809 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1810 _cleanup_free_
char *cgroup
= NULL
;
1815 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1819 return cg_path_get_slice(cgroup
, slice
);
1822 int cg_path_get_user_slice(const char *p
, char **slice
) {
1827 t
= skip_user_prefix(p
);
1831 /* And now it looks pretty much the same as for a system
1832 * slice, so let's just use the same parser from here on. */
1833 return cg_path_get_slice(t
, slice
);
1836 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1837 _cleanup_free_
char *cgroup
= NULL
;
1842 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1846 return cg_path_get_user_slice(cgroup
, slice
);
1849 char *cg_escape(const char *p
) {
1850 bool need_prefix
= false;
1852 /* This implements very minimal escaping for names to be used
1853 * as file names in the cgroup tree: any name which might
1854 * conflict with a kernel name or is prefixed with '_' is
1855 * prefixed with a '_'. That way, when reading cgroup names it
1856 * is sufficient to remove a single prefixing underscore if
1859 /* The return value of this function (unlike cg_unescape())
1862 if (IN_SET(p
[0], 0, '_', '.') ||
1863 streq(p
, "notify_on_release") ||
1864 streq(p
, "release_agent") ||
1865 streq(p
, "tasks") ||
1866 startswith(p
, "cgroup."))
1871 dot
= strrchr(p
, '.');
1876 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1879 n
= cgroup_controller_to_string(c
);
1884 if (memcmp(p
, n
, l
) != 0)
1894 return strappend("_", p
);
1899 char *cg_unescape(const char *p
) {
1902 /* The return value of this function (unlike cg_escape())
1903 * doesn't need free()! */
1911 #define CONTROLLER_VALID \
1915 bool cg_controller_is_valid(const char *p
) {
1921 if (streq(p
, SYSTEMD_CGROUP_CONTROLLER
))
1924 s
= startswith(p
, "name=");
1928 if (IN_SET(*p
, 0, '_'))
1931 for (t
= p
; *t
; t
++)
1932 if (!strchr(CONTROLLER_VALID
, *t
))
1935 if (t
- p
> FILENAME_MAX
)
1941 int cg_slice_to_path(const char *unit
, char **ret
) {
1942 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1949 if (streq(unit
, SPECIAL_ROOT_SLICE
)) {
1959 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1962 if (!endswith(unit
, ".slice"))
1965 r
= unit_name_to_prefix(unit
, &p
);
1969 dash
= strchr(p
, '-');
1971 /* Don't allow initial dashes */
1976 _cleanup_free_
char *escaped
= NULL
;
1977 char n
[dash
- p
+ sizeof(".slice")];
1979 #if HAS_FEATURE_MEMORY_SANITIZER
1980 /* msan doesn't instrument stpncpy, so it thinks
1981 * n is later used unitialized:
1982 * https://github.com/google/sanitizers/issues/926
1987 /* Don't allow trailing or double dashes */
1988 if (IN_SET(dash
[1], 0, '-'))
1991 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1992 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1995 escaped
= cg_escape(n
);
1999 if (!strextend(&s
, escaped
, "/", NULL
))
2002 dash
= strchr(dash
+1, '-');
2005 e
= cg_escape(unit
);
2009 if (!strextend(&s
, e
, NULL
))
2017 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
2018 _cleanup_free_
char *p
= NULL
;
2021 r
= cg_get_path(controller
, path
, attribute
, &p
);
2025 return write_string_file(p
, value
, 0);
2028 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
2029 _cleanup_free_
char *p
= NULL
;
2032 r
= cg_get_path(controller
, path
, attribute
, &p
);
2036 return read_one_line_file(p
, ret
);
2039 int cg_get_keyed_attribute(
2040 const char *controller
,
2042 const char *attribute
,
2044 char **ret_values
) {
2046 _cleanup_free_
char *filename
= NULL
, *contents
= NULL
;
2048 size_t n
, i
, n_done
= 0;
2052 /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
2053 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2054 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2056 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
2058 r
= cg_get_path(controller
, path
, attribute
, &filename
);
2062 r
= read_full_file(filename
, &contents
, NULL
);
2066 n
= strv_length(keys
);
2067 if (n
== 0) /* No keys to retrieve? That's easy, we are done then */
2070 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2071 v
= newa0(char*, n
);
2073 for (p
= contents
; *p
;) {
2074 const char *w
= NULL
;
2076 for (i
= 0; i
< n
; i
++)
2078 w
= first_word(p
, keys
[i
]);
2086 l
= strcspn(w
, NEWLINE
);
2087 v
[i
] = strndup(w
, l
);
2099 p
+= strcspn(p
, NEWLINE
);
2101 p
+= strspn(p
, NEWLINE
);
2107 for (i
= 0; i
< n
; i
++)
2113 memcpy(ret_values
, v
, sizeof(char*) * n
);
2118 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
2122 /* This one will create a cgroup in our private tree, but also
2123 * duplicate it in the trees specified in mask, and remove it
2126 /* First create the cgroup in our own hierarchy. */
2127 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
2131 /* If we are in the unified hierarchy, we are done now */
2132 r
= cg_all_unified();
2138 /* Otherwise, do the same in the other hierarchies */
2139 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2140 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2143 n
= cgroup_controller_to_string(c
);
2146 (void) cg_create(n
, path
);
2147 else if (supported
& bit
)
2148 (void) cg_trim(n
, path
, true);
2154 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
2158 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
2162 r
= cg_all_unified();
2168 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2169 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2170 const char *p
= NULL
;
2172 if (!(supported
& bit
))
2176 p
= path_callback(bit
, userdata
);
2181 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
2187 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
2192 SET_FOREACH(pidp
, pids
, i
) {
2193 pid_t pid
= PTR_TO_PID(pidp
);
2196 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
2197 if (q
< 0 && r
>= 0)
2204 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
2208 if (!path_equal(from
, to
)) {
2209 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, CGROUP_REMOVE
);
2214 q
= cg_all_unified();
2220 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2221 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2222 const char *p
= NULL
;
2224 if (!(supported
& bit
))
2228 p
= to_callback(bit
, userdata
);
2233 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, 0);
2239 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
2243 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
2247 q
= cg_all_unified();
2253 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2254 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2256 if (!(supported
& bit
))
2259 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
2265 int cg_mask_to_string(CGroupMask mask
, char **ret
) {
2266 _cleanup_free_
char *s
= NULL
;
2267 size_t n
= 0, allocated
= 0;
2278 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2282 if (!(mask
& CGROUP_CONTROLLER_TO_MASK(c
)))
2285 k
= cgroup_controller_to_string(c
);
2288 if (!GREEDY_REALLOC(s
, allocated
, n
+ space
+ l
+ 1))
2293 memcpy(s
+ n
+ space
, k
, l
);
2307 int cg_mask_from_string(const char *value
, CGroupMask
*mask
) {
2312 _cleanup_free_
char *n
= NULL
;
2316 r
= extract_first_word(&value
, &n
, NULL
, 0);
2322 v
= cgroup_controller_from_string(n
);
2326 *mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2331 int cg_mask_supported(CGroupMask
*ret
) {
2332 CGroupMask mask
= 0;
2335 /* Determines the mask of supported cgroup controllers. Only
2336 * includes controllers we can make sense of and that are
2337 * actually accessible. */
2339 r
= cg_all_unified();
2343 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
2345 /* In the unified hierarchy we can read the supported
2346 * and accessible controllers from a the top-level
2347 * cgroup attribute */
2349 r
= cg_get_root_path(&root
);
2353 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2357 r
= read_one_line_file(path
, &controllers
);
2361 r
= cg_mask_from_string(controllers
, &mask
);
2365 /* Currently, we support the cpu, memory, io and pids
2366 * controller in the unified hierarchy, mask
2367 * everything else off. */
2368 mask
&= CGROUP_MASK_CPU
| CGROUP_MASK_MEMORY
| CGROUP_MASK_IO
| CGROUP_MASK_PIDS
;
2373 /* In the legacy hierarchy, we check whether which
2374 * hierarchies are mounted. */
2376 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2379 n
= cgroup_controller_to_string(c
);
2380 if (controller_is_accessible(n
) >= 0)
2381 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2389 int cg_kernel_controllers(Set
**ret
) {
2390 _cleanup_set_free_free_ Set
*controllers
= NULL
;
2391 _cleanup_fclose_
FILE *f
= NULL
;
2396 /* Determines the full list of kernel-known controllers. Might
2397 * include controllers we don't actually support, arbitrary
2398 * named hierarchies and controllers that aren't currently
2399 * accessible (because not mounted). */
2401 controllers
= set_new(&string_hash_ops
);
2405 f
= fopen("/proc/cgroups", "re");
2407 if (errno
== ENOENT
) {
2415 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
2417 /* Ignore the header line */
2418 (void) read_line(f
, (size_t) -1, NULL
);
2425 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2430 if (ferror(f
) && errno
> 0)
2441 if (!cg_controller_is_valid(controller
)) {
2446 r
= set_consume(controllers
, controller
);
2457 static thread_local CGroupUnified unified_cache
= CGROUP_UNIFIED_UNKNOWN
;
2459 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2460 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2461 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2462 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2464 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2465 * process management but disable the compat dual layout, we return %true on
2466 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2468 static thread_local
bool unified_systemd_v232
;
2470 static int cg_unified_update(void) {
2474 /* Checks if we support the unified hierarchy. Returns an
2475 * error when the cgroup hierarchies aren't mounted yet or we
2476 * have any other trouble determining if the unified hierarchy
2479 if (unified_cache
>= CGROUP_UNIFIED_NONE
)
2482 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2483 return log_debug_errno(errno
, "statfs(\"/sys/fs/cgroup/\") failed: %m");
2485 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP2_SUPER_MAGIC
)) {
2486 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2487 unified_cache
= CGROUP_UNIFIED_ALL
;
2488 } else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
)) {
2489 if (statfs("/sys/fs/cgroup/unified/", &fs
) == 0 &&
2490 F_TYPE_EQUAL(fs
.f_type
, CGROUP2_SUPER_MAGIC
)) {
2491 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2492 unified_cache
= CGROUP_UNIFIED_SYSTEMD
;
2493 unified_systemd_v232
= false;
2495 if (statfs("/sys/fs/cgroup/systemd/", &fs
) < 0)
2496 return log_debug_errno(errno
, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2498 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP2_SUPER_MAGIC
)) {
2499 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2500 unified_cache
= CGROUP_UNIFIED_SYSTEMD
;
2501 unified_systemd_v232
= true;
2502 } else if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
)) {
2503 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2504 unified_cache
= CGROUP_UNIFIED_NONE
;
2506 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2507 (unsigned long long) fs
.f_type
);
2508 unified_cache
= CGROUP_UNIFIED_NONE
;
2512 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2513 (unsigned long long) fs
.f_type
);
2520 int cg_unified_controller(const char *controller
) {
2523 r
= cg_unified_update();
2527 if (unified_cache
== CGROUP_UNIFIED_NONE
)
2530 if (unified_cache
>= CGROUP_UNIFIED_ALL
)
2533 return streq_ptr(controller
, SYSTEMD_CGROUP_CONTROLLER
);
2536 int cg_all_unified(void) {
2539 r
= cg_unified_update();
2543 return unified_cache
>= CGROUP_UNIFIED_ALL
;
2546 int cg_hybrid_unified(void) {
2549 r
= cg_unified_update();
2553 return unified_cache
== CGROUP_UNIFIED_SYSTEMD
&& !unified_systemd_v232
;
2556 int cg_unified_flush(void) {
2557 unified_cache
= CGROUP_UNIFIED_UNKNOWN
;
2559 return cg_unified_update();
2562 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2563 _cleanup_fclose_
FILE *f
= NULL
;
2564 _cleanup_free_
char *fs
= NULL
;
2573 r
= cg_all_unified();
2576 if (r
== 0) /* on the legacy hiearchy there's no joining of controllers defined */
2579 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2583 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2584 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2587 if (!(supported
& bit
))
2590 n
= cgroup_controller_to_string(c
);
2592 char s
[1 + strlen(n
) + 1];
2594 s
[0] = mask
& bit
? '+' : '-';
2598 f
= fopen(fs
, "we");
2600 log_debug_errno(errno
, "Failed to open cgroup.subtree_control file of %s: %m", p
);
2605 r
= write_string_stream(f
, s
, 0);
2607 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2614 bool cg_is_unified_wanted(void) {
2615 static thread_local
int wanted
= -1;
2618 const bool is_default
= DEFAULT_HIERARCHY
== CGROUP_UNIFIED_ALL
;
2620 /* If we have a cached value, return that. */
2624 /* If the hierarchy is already mounted, then follow whatever
2625 * was chosen for it. */
2626 if (cg_unified_flush() >= 0)
2627 return (wanted
= unified_cache
>= CGROUP_UNIFIED_ALL
);
2629 /* Otherwise, let's see what the kernel command line has to say.
2630 * Since checking is expensive, cache a non-error result. */
2631 r
= proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b
);
2633 return (wanted
= r
> 0 ? b
: is_default
);
2636 bool cg_is_legacy_wanted(void) {
2637 static thread_local
int wanted
= -1;
2639 /* If we have a cached value, return that. */
2643 /* Check if we have cgroups2 already mounted. */
2644 if (cg_unified_flush() >= 0 &&
2645 unified_cache
== CGROUP_UNIFIED_ALL
)
2646 return (wanted
= false);
2648 /* Otherwise, assume that at least partial legacy is wanted,
2649 * since cgroups2 should already be mounted at this point. */
2650 return (wanted
= true);
2653 bool cg_is_hybrid_wanted(void) {
2654 static thread_local
int wanted
= -1;
2657 const bool is_default
= DEFAULT_HIERARCHY
>= CGROUP_UNIFIED_SYSTEMD
;
2658 /* We default to true if the default is "hybrid", obviously,
2659 * but also when the default is "unified", because if we get
2660 * called, it means that unified hierarchy was not mounted. */
2662 /* If we have a cached value, return that. */
2666 /* If the hierarchy is already mounted, then follow whatever
2667 * was chosen for it. */
2668 if (cg_unified_flush() >= 0 &&
2669 unified_cache
== CGROUP_UNIFIED_ALL
)
2670 return (wanted
= false);
2672 /* Otherwise, let's see what the kernel command line has to say.
2673 * Since checking is expensive, cache a non-error result. */
2674 r
= proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b
);
2676 /* The meaning of the kernel option is reversed wrt. to the return value
2677 * of this function, hence the negation. */
2678 return (wanted
= r
> 0 ? !b
: is_default
);
2681 int cg_weight_parse(const char *s
, uint64_t *ret
) {
2686 *ret
= CGROUP_WEIGHT_INVALID
;
2690 r
= safe_atou64(s
, &u
);
2694 if (u
< CGROUP_WEIGHT_MIN
|| u
> CGROUP_WEIGHT_MAX
)
2701 const uint64_t cgroup_io_limit_defaults
[_CGROUP_IO_LIMIT_TYPE_MAX
] = {
2702 [CGROUP_IO_RBPS_MAX
] = CGROUP_LIMIT_MAX
,
2703 [CGROUP_IO_WBPS_MAX
] = CGROUP_LIMIT_MAX
,
2704 [CGROUP_IO_RIOPS_MAX
] = CGROUP_LIMIT_MAX
,
2705 [CGROUP_IO_WIOPS_MAX
] = CGROUP_LIMIT_MAX
,
2708 static const char* const cgroup_io_limit_type_table
[_CGROUP_IO_LIMIT_TYPE_MAX
] = {
2709 [CGROUP_IO_RBPS_MAX
] = "IOReadBandwidthMax",
2710 [CGROUP_IO_WBPS_MAX
] = "IOWriteBandwidthMax",
2711 [CGROUP_IO_RIOPS_MAX
] = "IOReadIOPSMax",
2712 [CGROUP_IO_WIOPS_MAX
] = "IOWriteIOPSMax",
2715 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type
, CGroupIOLimitType
);
2717 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2722 *ret
= CGROUP_CPU_SHARES_INVALID
;
2726 r
= safe_atou64(s
, &u
);
2730 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2737 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2742 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2746 r
= safe_atou64(s
, &u
);
2750 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2757 bool is_cgroup_fs(const struct statfs
*s
) {
2758 return is_fs_type(s
, CGROUP_SUPER_MAGIC
) ||
2759 is_fs_type(s
, CGROUP2_SUPER_MAGIC
);
2762 bool fd_is_cgroup_fs(int fd
) {
2765 if (fstatfs(fd
, &s
) < 0)
2768 return is_cgroup_fs(&s
);
2771 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2772 [CGROUP_CONTROLLER_CPU
] = "cpu",
2773 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2774 [CGROUP_CONTROLLER_IO
] = "io",
2775 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2776 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2777 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2778 [CGROUP_CONTROLLER_PIDS
] = "pids",
2781 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);