2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/statfs.h>
30 #include <sys/types.h>
33 #include "alloc-util.h"
34 #include "cgroup-util.h"
36 #include "dirent-util.h"
37 #include "extract-word.h"
40 #include "formats-util.h"
43 #include "login-util.h"
47 #include "parse-util.h"
48 #include "path-util.h"
49 #include "proc-cmdline.h"
50 #include "process-util.h"
53 #include "stat-util.h"
54 #include "stdio-util.h"
55 #include "string-table.h"
56 #include "string-util.h"
57 #include "unit-name.h"
58 #include "user-util.h"
60 int cg_enumerate_processes(const char *controller
, const char *path
, FILE **_f
) {
61 _cleanup_free_
char *fs
= NULL
;
67 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
79 int cg_read_pid(FILE *f
, pid_t
*_pid
) {
82 /* Note that the cgroup.procs might contain duplicates! See
83 * cgroups.txt for details. */
89 if (fscanf(f
, "%lu", &ul
) != 1) {
94 return errno
> 0 ? -errno
: -EIO
;
104 int cg_enumerate_subgroups(const char *controller
, const char *path
, DIR **_d
) {
105 _cleanup_free_
char *fs
= NULL
;
111 /* This is not recursive! */
113 r
= cg_get_path(controller
, path
, NULL
, &fs
);
125 int cg_read_subgroup(DIR *d
, char **fn
) {
131 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
134 if (de
->d_type
!= DT_DIR
)
137 if (streq(de
->d_name
, ".") ||
138 streq(de
->d_name
, ".."))
141 b
= strdup(de
->d_name
);
152 int cg_rmdir(const char *controller
, const char *path
) {
153 _cleanup_free_
char *p
= NULL
;
156 r
= cg_get_path(controller
, path
, NULL
, &p
);
161 if (r
< 0 && errno
!= ENOENT
)
167 int cg_kill(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, Set
*s
) {
168 _cleanup_set_free_ Set
*allocated_set
= NULL
;
175 /* This goes through the tasks list and kills them all. This
176 * is repeated until no further processes are added to the
177 * tasks list, to properly handle forking processes */
180 s
= allocated_set
= set_new(NULL
);
188 _cleanup_fclose_
FILE *f
= NULL
;
192 r
= cg_enumerate_processes(controller
, path
, &f
);
194 if (ret
>= 0 && r
!= -ENOENT
)
200 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
202 if (ignore_self
&& pid
== my_pid
)
205 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
208 /* If we haven't killed this process yet, kill
210 if (kill(pid
, sig
) < 0) {
211 if (ret
>= 0 && errno
!= ESRCH
)
214 if (sigcont
&& sig
!= SIGKILL
)
215 (void) kill(pid
, SIGCONT
);
223 r
= set_put(s
, PID_TO_PTR(pid
));
239 /* To avoid racing against processes which fork
240 * quicker than we can kill them we repeat this until
241 * no new pids need to be killed. */
248 int cg_kill_recursive(const char *controller
, const char *path
, int sig
, bool sigcont
, bool ignore_self
, bool rem
, Set
*s
) {
249 _cleanup_set_free_ Set
*allocated_set
= NULL
;
250 _cleanup_closedir_
DIR *d
= NULL
;
258 s
= allocated_set
= set_new(NULL
);
263 ret
= cg_kill(controller
, path
, sig
, sigcont
, ignore_self
, s
);
265 r
= cg_enumerate_subgroups(controller
, path
, &d
);
267 if (ret
>= 0 && r
!= -ENOENT
)
273 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
274 _cleanup_free_
char *p
= NULL
;
276 p
= strjoin(path
, "/", fn
, NULL
);
281 r
= cg_kill_recursive(controller
, p
, sig
, sigcont
, ignore_self
, rem
, s
);
282 if (r
!= 0 && ret
>= 0)
286 if (ret
>= 0 && r
< 0)
290 r
= cg_rmdir(controller
, path
);
291 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
298 int cg_migrate(const char *cfrom
, const char *pfrom
, const char *cto
, const char *pto
, bool ignore_self
) {
300 _cleanup_set_free_ Set
*s
= NULL
;
316 _cleanup_fclose_
FILE *f
= NULL
;
320 r
= cg_enumerate_processes(cfrom
, pfrom
, &f
);
322 if (ret
>= 0 && r
!= -ENOENT
)
328 while ((r
= cg_read_pid(f
, &pid
)) > 0) {
330 /* This might do weird stuff if we aren't a
331 * single-threaded program. However, we
332 * luckily know we are not */
333 if (ignore_self
&& pid
== my_pid
)
336 if (set_get(s
, PID_TO_PTR(pid
)) == PID_TO_PTR(pid
))
339 /* Ignore kernel threads. Since they can only
340 * exist in the root cgroup, we only check for
343 (isempty(pfrom
) || path_equal(pfrom
, "/")) &&
344 is_kernel_thread(pid
) > 0)
347 r
= cg_attach(cto
, pto
, pid
);
349 if (ret
>= 0 && r
!= -ESRCH
)
356 r
= set_put(s
, PID_TO_PTR(pid
));
376 int cg_migrate_recursive(
384 _cleanup_closedir_
DIR *d
= NULL
;
393 ret
= cg_migrate(cfrom
, pfrom
, cto
, pto
, ignore_self
);
395 r
= cg_enumerate_subgroups(cfrom
, pfrom
, &d
);
397 if (ret
>= 0 && r
!= -ENOENT
)
403 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
404 _cleanup_free_
char *p
= NULL
;
406 p
= strjoin(pfrom
, "/", fn
, NULL
);
411 r
= cg_migrate_recursive(cfrom
, p
, cto
, pto
, ignore_self
, rem
);
412 if (r
!= 0 && ret
>= 0)
416 if (r
< 0 && ret
>= 0)
420 r
= cg_rmdir(cfrom
, pfrom
);
421 if (r
< 0 && ret
>= 0 && r
!= -ENOENT
&& r
!= -EBUSY
)
428 int cg_migrate_recursive_fallback(
443 r
= cg_migrate_recursive(cfrom
, pfrom
, cto
, pto
, ignore_self
, rem
);
445 char prefix
[strlen(pto
) + 1];
447 /* This didn't work? Then let's try all prefixes of the destination */
449 PATH_FOREACH_PREFIX(prefix
, pto
) {
452 q
= cg_migrate_recursive(cfrom
, pfrom
, cto
, prefix
, ignore_self
, rem
);
461 static const char *controller_to_dirname(const char *controller
) {
466 /* Converts a controller name to the directory name below
467 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
468 * just cuts off the name= prefixed used for named
469 * hierarchies, if it is specified. */
471 e
= startswith(controller
, "name=");
478 static int join_path_legacy(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
485 dn
= controller_to_dirname(controller
);
487 if (isempty(path
) && isempty(suffix
))
488 t
= strappend("/sys/fs/cgroup/", dn
);
489 else if (isempty(path
))
490 t
= strjoin("/sys/fs/cgroup/", dn
, "/", suffix
, NULL
);
491 else if (isempty(suffix
))
492 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, NULL
);
494 t
= strjoin("/sys/fs/cgroup/", dn
, "/", path
, "/", suffix
, NULL
);
502 static int join_path_unified(const char *path
, const char *suffix
, char **fs
) {
507 if (isempty(path
) && isempty(suffix
))
508 t
= strdup("/sys/fs/cgroup");
509 else if (isempty(path
))
510 t
= strappend("/sys/fs/cgroup/", suffix
);
511 else if (isempty(suffix
))
512 t
= strappend("/sys/fs/cgroup/", path
);
514 t
= strjoin("/sys/fs/cgroup/", path
, "/", suffix
, NULL
);
522 int cg_get_path(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
530 /* If no controller is specified, we return the path
531 * *below* the controllers, without any prefix. */
533 if (!path
&& !suffix
)
541 t
= strjoin(path
, "/", suffix
, NULL
);
545 *fs
= path_kill_slashes(t
);
549 if (!cg_controller_is_valid(controller
))
552 unified
= cg_unified();
557 r
= join_path_unified(path
, suffix
, fs
);
559 r
= join_path_legacy(controller
, path
, suffix
, fs
);
563 path_kill_slashes(*fs
);
567 static int controller_is_accessible(const char *controller
) {
572 /* Checks whether a specific controller is accessible,
573 * i.e. its hierarchy mounted. In the unified hierarchy all
574 * controllers are considered accessible, except for the named
577 if (!cg_controller_is_valid(controller
))
580 unified
= cg_unified();
584 /* We don't support named hierarchies if we are using
585 * the unified hierarchy. */
587 if (streq(controller
, SYSTEMD_CGROUP_CONTROLLER
))
590 if (startswith(controller
, "name="))
596 dn
= controller_to_dirname(controller
);
597 cc
= strjoina("/sys/fs/cgroup/", dn
);
599 if (laccess(cc
, F_OK
) < 0)
606 int cg_get_path_and_check(const char *controller
, const char *path
, const char *suffix
, char **fs
) {
612 /* Check if the specified controller is actually accessible */
613 r
= controller_is_accessible(controller
);
617 return cg_get_path(controller
, path
, suffix
, fs
);
620 static int trim_cb(const char *path
, const struct stat
*sb
, int typeflag
, struct FTW
*ftwbuf
) {
625 if (typeflag
!= FTW_DP
)
628 if (ftwbuf
->level
< 1)
635 int cg_trim(const char *controller
, const char *path
, bool delete_root
) {
636 _cleanup_free_
char *fs
= NULL
;
641 r
= cg_get_path(controller
, path
, NULL
, &fs
);
646 if (nftw(fs
, trim_cb
, 64, FTW_DEPTH
|FTW_MOUNT
|FTW_PHYS
) != 0) {
656 if (rmdir(fs
) < 0 && errno
!= ENOENT
)
663 int cg_create(const char *controller
, const char *path
) {
664 _cleanup_free_
char *fs
= NULL
;
667 r
= cg_get_path_and_check(controller
, path
, NULL
, &fs
);
671 r
= mkdir_parents(fs
, 0755);
675 if (mkdir(fs
, 0755) < 0) {
686 int cg_create_and_attach(const char *controller
, const char *path
, pid_t pid
) {
691 r
= cg_create(controller
, path
);
695 q
= cg_attach(controller
, path
, pid
);
699 /* This does not remove the cgroup on failure */
703 int cg_attach(const char *controller
, const char *path
, pid_t pid
) {
704 _cleanup_free_
char *fs
= NULL
;
705 char c
[DECIMAL_STR_MAX(pid_t
) + 2];
711 r
= cg_get_path_and_check(controller
, path
, "cgroup.procs", &fs
);
718 xsprintf(c
, PID_FMT
"\n", pid
);
720 return write_string_file(fs
, c
, 0);
723 int cg_attach_fallback(const char *controller
, const char *path
, pid_t pid
) {
730 r
= cg_attach(controller
, path
, pid
);
732 char prefix
[strlen(path
) + 1];
734 /* This didn't work? Then let's try all prefixes of
737 PATH_FOREACH_PREFIX(prefix
, path
) {
740 q
= cg_attach(controller
, prefix
, pid
);
749 int cg_set_group_access(
750 const char *controller
,
756 _cleanup_free_
char *fs
= NULL
;
759 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
762 if (mode
!= MODE_INVALID
)
765 r
= cg_get_path(controller
, path
, NULL
, &fs
);
769 return chmod_and_chown(fs
, mode
, uid
, gid
);
772 int cg_set_task_access(
773 const char *controller
,
779 _cleanup_free_
char *fs
= NULL
, *procs
= NULL
;
784 if (mode
== MODE_INVALID
&& uid
== UID_INVALID
&& gid
== GID_INVALID
)
787 if (mode
!= MODE_INVALID
)
790 r
= cg_get_path(controller
, path
, "cgroup.procs", &fs
);
794 r
= chmod_and_chown(fs
, mode
, uid
, gid
);
798 unified
= cg_unified();
804 /* Compatibility, Always keep values for "tasks" in sync with
806 if (cg_get_path(controller
, path
, "tasks", &procs
) >= 0)
807 (void) chmod_and_chown(procs
, mode
, uid
, gid
);
812 int cg_pid_get_path(const char *controller
, pid_t pid
, char **path
) {
813 _cleanup_fclose_
FILE *f
= NULL
;
822 unified
= cg_unified();
827 if (!cg_controller_is_valid(controller
))
830 controller
= SYSTEMD_CGROUP_CONTROLLER
;
832 cs
= strlen(controller
);
835 fs
= procfs_file_alloca(pid
, "cgroup");
838 return errno
== ENOENT
? -ESRCH
: -errno
;
840 FOREACH_LINE(line
, f
, return -errno
) {
846 e
= startswith(line
, "0:");
856 const char *word
, *state
;
859 l
= strchr(line
, ':');
869 FOREACH_WORD_SEPARATOR(word
, k
, l
, ",", state
) {
870 if (k
== cs
&& memcmp(word
, controller
, cs
) == 0) {
891 int cg_install_release_agent(const char *controller
, const char *agent
) {
892 _cleanup_free_
char *fs
= NULL
, *contents
= NULL
;
898 unified
= cg_unified();
901 if (unified
) /* doesn't apply to unified hierarchy */
904 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
908 r
= read_one_line_file(fs
, &contents
);
912 sc
= strstrip(contents
);
914 r
= write_string_file(fs
, agent
, 0);
917 } else if (!path_equal(sc
, agent
))
921 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
925 contents
= mfree(contents
);
926 r
= read_one_line_file(fs
, &contents
);
930 sc
= strstrip(contents
);
931 if (streq(sc
, "0")) {
932 r
= write_string_file(fs
, "1", 0);
945 int cg_uninstall_release_agent(const char *controller
) {
946 _cleanup_free_
char *fs
= NULL
;
949 unified
= cg_unified();
952 if (unified
) /* Doesn't apply to unified hierarchy */
955 r
= cg_get_path(controller
, NULL
, "notify_on_release", &fs
);
959 r
= write_string_file(fs
, "0", 0);
965 r
= cg_get_path(controller
, NULL
, "release_agent", &fs
);
969 r
= write_string_file(fs
, "", 0);
976 int cg_is_empty(const char *controller
, const char *path
) {
977 _cleanup_fclose_
FILE *f
= NULL
;
983 r
= cg_enumerate_processes(controller
, path
, &f
);
989 r
= cg_read_pid(f
, &pid
);
996 int cg_is_empty_recursive(const char *controller
, const char *path
) {
1001 /* The root cgroup is always populated */
1002 if (controller
&& (isempty(path
) || path_equal(path
, "/")))
1005 unified
= cg_unified();
1010 _cleanup_free_
char *populated
= NULL
, *t
= NULL
;
1012 /* On the unified hierarchy we can check empty state
1013 * via the "cgroup.populated" attribute. */
1015 r
= cg_get_path(controller
, path
, "cgroup.populated", &populated
);
1019 r
= read_one_line_file(populated
, &t
);
1025 return streq(t
, "0");
1027 _cleanup_closedir_
DIR *d
= NULL
;
1030 r
= cg_is_empty(controller
, path
);
1034 r
= cg_enumerate_subgroups(controller
, path
, &d
);
1040 while ((r
= cg_read_subgroup(d
, &fn
)) > 0) {
1041 _cleanup_free_
char *p
= NULL
;
1043 p
= strjoin(path
, "/", fn
, NULL
);
1048 r
= cg_is_empty_recursive(controller
, p
);
1059 int cg_split_spec(const char *spec
, char **controller
, char **path
) {
1060 char *t
= NULL
, *u
= NULL
;
1066 if (!path_is_safe(spec
))
1074 *path
= path_kill_slashes(t
);
1083 e
= strchr(spec
, ':');
1085 if (!cg_controller_is_valid(spec
))
1102 t
= strndup(spec
, e
-spec
);
1105 if (!cg_controller_is_valid(t
)) {
1119 if (!path_is_safe(u
) ||
1120 !path_is_absolute(u
)) {
1126 path_kill_slashes(u
);
1142 int cg_mangle_path(const char *path
, char **result
) {
1143 _cleanup_free_
char *c
= NULL
, *p
= NULL
;
1150 /* First, check if it already is a filesystem path */
1151 if (path_startswith(path
, "/sys/fs/cgroup")) {
1157 *result
= path_kill_slashes(t
);
1161 /* Otherwise, treat it as cg spec */
1162 r
= cg_split_spec(path
, &c
, &p
);
1166 return cg_get_path(c
?: SYSTEMD_CGROUP_CONTROLLER
, p
?: "/", NULL
, result
);
1169 int cg_get_root_path(char **path
) {
1175 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 1, &p
);
1179 e
= endswith(p
, "/" SPECIAL_INIT_SCOPE
);
1181 e
= endswith(p
, "/" SPECIAL_SYSTEM_SLICE
); /* legacy */
1183 e
= endswith(p
, "/system"); /* even more legacy */
1191 int cg_shift_path(const char *cgroup
, const char *root
, const char **shifted
) {
1192 _cleanup_free_
char *rt
= NULL
;
1200 /* If the root was specified let's use that, otherwise
1201 * let's determine it from PID 1 */
1203 r
= cg_get_root_path(&rt
);
1210 p
= path_startswith(cgroup
, root
);
1211 if (p
&& p
> cgroup
)
1219 int cg_pid_get_path_shifted(pid_t pid
, const char *root
, char **cgroup
) {
1220 _cleanup_free_
char *raw
= NULL
;
1227 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &raw
);
1231 r
= cg_shift_path(raw
, root
, &c
);
1251 int cg_path_decode_unit(const char *cgroup
, char **unit
) {
1258 n
= strcspn(cgroup
, "/");
1262 c
= strndupa(cgroup
, n
);
1265 if (!unit_name_is_valid(c
, UNIT_NAME_PLAIN
|UNIT_NAME_INSTANCE
))
1276 static bool valid_slice_name(const char *p
, size_t n
) {
1281 if (n
< strlen("x.slice"))
1284 if (memcmp(p
+ n
- 6, ".slice", 6) == 0) {
1290 c
= cg_unescape(buf
);
1292 return unit_name_is_valid(c
, UNIT_NAME_PLAIN
);
1298 static const char *skip_slices(const char *p
) {
1301 /* Skips over all slice assignments */
1306 p
+= strspn(p
, "/");
1308 n
= strcspn(p
, "/");
1309 if (!valid_slice_name(p
, n
))
1316 int cg_path_get_unit(const char *path
, char **ret
) {
1324 e
= skip_slices(path
);
1326 r
= cg_path_decode_unit(e
, &unit
);
1330 /* We skipped over the slices, don't accept any now */
1331 if (endswith(unit
, ".slice")) {
1340 int cg_pid_get_unit(pid_t pid
, char **unit
) {
1341 _cleanup_free_
char *cgroup
= NULL
;
1346 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1350 return cg_path_get_unit(cgroup
, unit
);
1354 * Skip session-*.scope, but require it to be there.
1356 static const char *skip_session(const char *p
) {
1362 p
+= strspn(p
, "/");
1364 n
= strcspn(p
, "/");
1365 if (n
< strlen("session-x.scope"))
1368 if (memcmp(p
, "session-", 8) == 0 && memcmp(p
+ n
- 6, ".scope", 6) == 0) {
1369 char buf
[n
- 8 - 6 + 1];
1371 memcpy(buf
, p
+ 8, n
- 8 - 6);
1374 /* Note that session scopes never need unescaping,
1375 * since they cannot conflict with the kernel's own
1376 * names, hence we don't need to call cg_unescape()
1379 if (!session_id_valid(buf
))
1383 p
+= strspn(p
, "/");
1391 * Skip user@*.service, but require it to be there.
1393 static const char *skip_user_manager(const char *p
) {
1399 p
+= strspn(p
, "/");
1401 n
= strcspn(p
, "/");
1402 if (n
< strlen("user@x.service"))
1405 if (memcmp(p
, "user@", 5) == 0 && memcmp(p
+ n
- 8, ".service", 8) == 0) {
1406 char buf
[n
- 5 - 8 + 1];
1408 memcpy(buf
, p
+ 5, n
- 5 - 8);
1411 /* Note that user manager services never need unescaping,
1412 * since they cannot conflict with the kernel's own
1413 * names, hence we don't need to call cg_unescape()
1416 if (parse_uid(buf
, NULL
) < 0)
1420 p
+= strspn(p
, "/");
1428 static const char *skip_user_prefix(const char *path
) {
1433 /* Skip slices, if there are any */
1434 e
= skip_slices(path
);
1436 /* Skip the user manager, if it's in the path now... */
1437 t
= skip_user_manager(e
);
1441 /* Alternatively skip the user session if it is in the path... */
1442 return skip_session(e
);
1445 int cg_path_get_user_unit(const char *path
, char **ret
) {
1451 t
= skip_user_prefix(path
);
1455 /* And from here on it looks pretty much the same as for a
1456 * system unit, hence let's use the same parser from here
1458 return cg_path_get_unit(t
, ret
);
1461 int cg_pid_get_user_unit(pid_t pid
, char **unit
) {
1462 _cleanup_free_
char *cgroup
= NULL
;
1467 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1471 return cg_path_get_user_unit(cgroup
, unit
);
1474 int cg_path_get_machine_name(const char *path
, char **machine
) {
1475 _cleanup_free_
char *u
= NULL
;
1479 r
= cg_path_get_unit(path
, &u
);
1483 sl
= strjoina("/run/systemd/machines/unit:", u
);
1484 return readlink_malloc(sl
, machine
);
1487 int cg_pid_get_machine_name(pid_t pid
, char **machine
) {
1488 _cleanup_free_
char *cgroup
= NULL
;
1493 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1497 return cg_path_get_machine_name(cgroup
, machine
);
1500 int cg_path_get_session(const char *path
, char **session
) {
1501 _cleanup_free_
char *unit
= NULL
;
1507 r
= cg_path_get_unit(path
, &unit
);
1511 start
= startswith(unit
, "session-");
1514 end
= endswith(start
, ".scope");
1519 if (!session_id_valid(start
))
1535 int cg_pid_get_session(pid_t pid
, char **session
) {
1536 _cleanup_free_
char *cgroup
= NULL
;
1539 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1543 return cg_path_get_session(cgroup
, session
);
1546 int cg_path_get_owner_uid(const char *path
, uid_t
*uid
) {
1547 _cleanup_free_
char *slice
= NULL
;
1553 r
= cg_path_get_slice(path
, &slice
);
1557 start
= startswith(slice
, "user-");
1560 end
= endswith(start
, ".slice");
1565 if (parse_uid(start
, uid
) < 0)
1571 int cg_pid_get_owner_uid(pid_t pid
, uid_t
*uid
) {
1572 _cleanup_free_
char *cgroup
= NULL
;
1575 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1579 return cg_path_get_owner_uid(cgroup
, uid
);
1582 int cg_path_get_slice(const char *p
, char **slice
) {
1583 const char *e
= NULL
;
1588 /* Finds the right-most slice unit from the beginning, but
1589 * stops before we come to the first non-slice unit. */
1594 p
+= strspn(p
, "/");
1596 n
= strcspn(p
, "/");
1597 if (!valid_slice_name(p
, n
)) {
1602 s
= strdup("-.slice");
1610 return cg_path_decode_unit(e
, slice
);
1618 int cg_pid_get_slice(pid_t pid
, char **slice
) {
1619 _cleanup_free_
char *cgroup
= NULL
;
1624 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1628 return cg_path_get_slice(cgroup
, slice
);
1631 int cg_path_get_user_slice(const char *p
, char **slice
) {
1636 t
= skip_user_prefix(p
);
1640 /* And now it looks pretty much the same as for a system
1641 * slice, so let's just use the same parser from here on. */
1642 return cg_path_get_slice(t
, slice
);
1645 int cg_pid_get_user_slice(pid_t pid
, char **slice
) {
1646 _cleanup_free_
char *cgroup
= NULL
;
1651 r
= cg_pid_get_path_shifted(pid
, NULL
, &cgroup
);
1655 return cg_path_get_user_slice(cgroup
, slice
);
1658 char *cg_escape(const char *p
) {
1659 bool need_prefix
= false;
1661 /* This implements very minimal escaping for names to be used
1662 * as file names in the cgroup tree: any name which might
1663 * conflict with a kernel name or is prefixed with '_' is
1664 * prefixed with a '_'. That way, when reading cgroup names it
1665 * is sufficient to remove a single prefixing underscore if
1668 /* The return value of this function (unlike cg_unescape())
1674 streq(p
, "notify_on_release") ||
1675 streq(p
, "release_agent") ||
1676 streq(p
, "tasks") ||
1677 startswith(p
, "cgroup."))
1682 dot
= strrchr(p
, '.');
1687 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1690 n
= cgroup_controller_to_string(c
);
1695 if (memcmp(p
, n
, l
) != 0)
1705 return strappend("_", p
);
1710 char *cg_unescape(const char *p
) {
1713 /* The return value of this function (unlike cg_escape())
1714 * doesn't need free()! */
1722 #define CONTROLLER_VALID \
1726 bool cg_controller_is_valid(const char *p
) {
1732 s
= startswith(p
, "name=");
1736 if (*p
== 0 || *p
== '_')
1739 for (t
= p
; *t
; t
++)
1740 if (!strchr(CONTROLLER_VALID
, *t
))
1743 if (t
- p
> FILENAME_MAX
)
1749 int cg_slice_to_path(const char *unit
, char **ret
) {
1750 _cleanup_free_
char *p
= NULL
, *s
= NULL
, *e
= NULL
;
1757 if (streq(unit
, "-.slice")) {
1767 if (!unit_name_is_valid(unit
, UNIT_NAME_PLAIN
))
1770 if (!endswith(unit
, ".slice"))
1773 r
= unit_name_to_prefix(unit
, &p
);
1777 dash
= strchr(p
, '-');
1779 /* Don't allow initial dashes */
1784 _cleanup_free_
char *escaped
= NULL
;
1785 char n
[dash
- p
+ sizeof(".slice")];
1787 /* Don't allow trailing or double dashes */
1788 if (dash
[1] == 0 || dash
[1] == '-')
1791 strcpy(stpncpy(n
, p
, dash
- p
), ".slice");
1792 if (!unit_name_is_valid(n
, UNIT_NAME_PLAIN
))
1795 escaped
= cg_escape(n
);
1799 if (!strextend(&s
, escaped
, "/", NULL
))
1802 dash
= strchr(dash
+1, '-');
1805 e
= cg_escape(unit
);
1809 if (!strextend(&s
, e
, NULL
))
1818 int cg_set_attribute(const char *controller
, const char *path
, const char *attribute
, const char *value
) {
1819 _cleanup_free_
char *p
= NULL
;
1822 r
= cg_get_path(controller
, path
, attribute
, &p
);
1826 return write_string_file(p
, value
, 0);
1829 int cg_get_attribute(const char *controller
, const char *path
, const char *attribute
, char **ret
) {
1830 _cleanup_free_
char *p
= NULL
;
1833 r
= cg_get_path(controller
, path
, attribute
, &p
);
1837 return read_one_line_file(p
, ret
);
1840 int cg_create_everywhere(CGroupMask supported
, CGroupMask mask
, const char *path
) {
1844 /* This one will create a cgroup in our private tree, but also
1845 * duplicate it in the trees specified in mask, and remove it
1848 /* First create the cgroup in our own hierarchy. */
1849 r
= cg_create(SYSTEMD_CGROUP_CONTROLLER
, path
);
1853 /* If we are in the unified hierarchy, we are done now */
1854 unified
= cg_unified();
1860 /* Otherwise, do the same in the other hierarchies */
1861 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1862 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1865 n
= cgroup_controller_to_string(c
);
1868 (void) cg_create(n
, path
);
1869 else if (supported
& bit
)
1870 (void) cg_trim(n
, path
, true);
1876 int cg_attach_everywhere(CGroupMask supported
, const char *path
, pid_t pid
, cg_migrate_callback_t path_callback
, void *userdata
) {
1880 r
= cg_attach(SYSTEMD_CGROUP_CONTROLLER
, path
, pid
);
1884 unified
= cg_unified();
1890 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1891 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1892 const char *p
= NULL
;
1894 if (!(supported
& bit
))
1898 p
= path_callback(bit
, userdata
);
1903 (void) cg_attach_fallback(cgroup_controller_to_string(c
), p
, pid
);
1909 int cg_attach_many_everywhere(CGroupMask supported
, const char *path
, Set
* pids
, cg_migrate_callback_t path_callback
, void *userdata
) {
1914 SET_FOREACH(pidp
, pids
, i
) {
1915 pid_t pid
= PTR_TO_PID(pidp
);
1918 q
= cg_attach_everywhere(supported
, path
, pid
, path_callback
, userdata
);
1919 if (q
< 0 && r
>= 0)
1926 int cg_migrate_everywhere(CGroupMask supported
, const char *from
, const char *to
, cg_migrate_callback_t to_callback
, void *userdata
) {
1930 if (!path_equal(from
, to
)) {
1931 r
= cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER
, from
, SYSTEMD_CGROUP_CONTROLLER
, to
, false, true);
1936 unified
= cg_unified();
1942 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1943 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1944 const char *p
= NULL
;
1946 if (!(supported
& bit
))
1950 p
= to_callback(bit
, userdata
);
1955 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER
, to
, cgroup_controller_to_string(c
), p
, false, false);
1961 int cg_trim_everywhere(CGroupMask supported
, const char *path
, bool delete_root
) {
1965 r
= cg_trim(SYSTEMD_CGROUP_CONTROLLER
, path
, delete_root
);
1969 unified
= cg_unified();
1975 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
1976 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
1978 if (!(supported
& bit
))
1981 (void) cg_trim(cgroup_controller_to_string(c
), path
, delete_root
);
1987 int cg_mask_supported(CGroupMask
*ret
) {
1988 CGroupMask mask
= 0;
1991 /* Determines the mask of supported cgroup controllers. Only
1992 * includes controllers we can make sense of and that are
1993 * actually accessible. */
1995 unified
= cg_unified();
1999 _cleanup_free_
char *root
= NULL
, *controllers
= NULL
, *path
= NULL
;
2002 /* In the unified hierarchy we can read the supported
2003 * and accessible controllers from a the top-level
2004 * cgroup attribute */
2006 r
= cg_get_root_path(&root
);
2010 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, root
, "cgroup.controllers", &path
);
2014 r
= read_one_line_file(path
, &controllers
);
2020 _cleanup_free_
char *n
= NULL
;
2023 r
= extract_first_word(&c
, &n
, NULL
, 0);
2029 v
= cgroup_controller_from_string(n
);
2033 mask
|= CGROUP_CONTROLLER_TO_MASK(v
);
2036 /* Currently, we only support the memory and pids
2037 * controller in the unified hierarchy, mask
2038 * everything else off. */
2039 mask
&= CGROUP_MASK_MEMORY
| CGROUP_MASK_PIDS
;
2044 /* In the legacy hierarchy, we check whether which
2045 * hierarchies are mounted. */
2047 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2050 n
= cgroup_controller_to_string(c
);
2051 if (controller_is_accessible(n
) >= 0)
2052 mask
|= CGROUP_CONTROLLER_TO_MASK(c
);
2060 int cg_kernel_controllers(Set
*controllers
) {
2061 _cleanup_fclose_
FILE *f
= NULL
;
2065 assert(controllers
);
2067 /* Determines the full list of kernel-known controllers. Might
2068 * include controllers we don't actually support, arbitrary
2069 * named hierarchies and controllers that aren't currently
2070 * accessible (because not mounted). */
2072 f
= fopen("/proc/cgroups", "re");
2074 if (errno
== ENOENT
)
2079 /* Ignore the header line */
2080 (void) fgets(buf
, sizeof(buf
), f
);
2087 if (fscanf(f
, "%ms %*i %*i %i", &controller
, &enabled
) != 2) {
2092 if (ferror(f
) && errno
> 0)
2103 if (!cg_controller_is_valid(controller
)) {
2108 r
= set_consume(controllers
, controller
);
2116 static thread_local
int unified_cache
= -1;
2118 int cg_unified(void) {
2121 /* Checks if we support the unified hierarchy. Returns an
2122 * error when the cgroup hierarchies aren't mounted yet or we
2123 * have any other trouble determining if the unified hierarchy
2126 if (unified_cache
>= 0)
2127 return unified_cache
;
2129 if (statfs("/sys/fs/cgroup/", &fs
) < 0)
2132 if (F_TYPE_EQUAL(fs
.f_type
, CGROUP_SUPER_MAGIC
))
2133 unified_cache
= true;
2134 else if (F_TYPE_EQUAL(fs
.f_type
, TMPFS_MAGIC
))
2135 unified_cache
= false;
2139 return unified_cache
;
2142 void cg_unified_flush(void) {
2146 int cg_enable_everywhere(CGroupMask supported
, CGroupMask mask
, const char *p
) {
2147 _cleanup_free_
char *fs
= NULL
;
2156 unified
= cg_unified();
2159 if (!unified
) /* on the legacy hiearchy there's no joining of controllers defined */
2162 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, p
, "cgroup.subtree_control", &fs
);
2166 for (c
= 0; c
< _CGROUP_CONTROLLER_MAX
; c
++) {
2167 CGroupMask bit
= CGROUP_CONTROLLER_TO_MASK(c
);
2170 if (!(supported
& bit
))
2173 n
= cgroup_controller_to_string(c
);
2175 char s
[1 + strlen(n
) + 1];
2177 s
[0] = mask
& bit
? '+' : '-';
2180 r
= write_string_file(fs
, s
, 0);
2182 log_debug_errno(r
, "Failed to enable controller %s for %s (%s): %m", n
, p
, fs
);
2189 bool cg_is_unified_wanted(void) {
2190 static thread_local
int wanted
= -1;
2193 /* If the hierarchy is already mounted, then follow whatever
2194 * was chosen for it. */
2195 unified
= cg_unified();
2199 /* Otherwise, let's see what the kernel command line has to
2200 * say. Since checking that is expensive, let's cache the
2205 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL
);
2207 return (wanted
= true);
2209 _cleanup_free_
char *value
= NULL
;
2211 r
= get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value
);
2215 return (wanted
= false);
2217 return (wanted
= parse_boolean(value
) > 0);
2221 bool cg_is_legacy_wanted(void) {
2222 return !cg_is_unified_wanted();
2225 int cg_cpu_shares_parse(const char *s
, uint64_t *ret
) {
2230 *ret
= CGROUP_CPU_SHARES_INVALID
;
2234 r
= safe_atou64(s
, &u
);
2238 if (u
< CGROUP_CPU_SHARES_MIN
|| u
> CGROUP_CPU_SHARES_MAX
)
2245 int cg_blkio_weight_parse(const char *s
, uint64_t *ret
) {
2250 *ret
= CGROUP_BLKIO_WEIGHT_INVALID
;
2254 r
= safe_atou64(s
, &u
);
2258 if (u
< CGROUP_BLKIO_WEIGHT_MIN
|| u
> CGROUP_BLKIO_WEIGHT_MAX
)
2265 static const char *cgroup_controller_table
[_CGROUP_CONTROLLER_MAX
] = {
2266 [CGROUP_CONTROLLER_CPU
] = "cpu",
2267 [CGROUP_CONTROLLER_CPUACCT
] = "cpuacct",
2268 [CGROUP_CONTROLLER_BLKIO
] = "blkio",
2269 [CGROUP_CONTROLLER_MEMORY
] = "memory",
2270 [CGROUP_CONTROLLER_DEVICES
] = "devices",
2271 [CGROUP_CONTROLLER_PIDS
] = "pids",
2274 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller
, CGroupController
);