]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
cgroup-util: add new cg_remove_xattr() for removing xattr from cgroup
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833
LP
2
3#include <errno.h>
84ac7bea 4#include <ftw.h>
11c3a366 5#include <limits.h>
8c6db833 6#include <signal.h>
11c3a366 7#include <stddef.h>
8c6db833 8#include <stdlib.h>
672c48cc 9#include <sys/types.h>
f98c2585 10#include <sys/utsname.h>
4b58153d 11#include <sys/xattr.h>
84ac7bea 12#include <unistd.h>
8c6db833 13
b5efdb8a 14#include "alloc-util.h"
3ffd4af2 15#include "cgroup-util.h"
93cc7779 16#include "def.h"
a0956174 17#include "dirent-util.h"
84ac7bea 18#include "extract-word.h"
3ffd4af2 19#include "fd-util.h"
84ac7bea 20#include "fileio.h"
f97b34a6 21#include "format-util.h"
f4f15635 22#include "fs-util.h"
93cc7779 23#include "log.h"
84ac7bea
LP
24#include "login-util.h"
25#include "macro.h"
f5947a5e 26#include "missing_magic.h"
84ac7bea 27#include "mkdir.h"
6bedfcbb 28#include "parse-util.h"
9eb977db 29#include "path-util.h"
84ac7bea
LP
30#include "process-util.h"
31#include "set.h"
9444b1f2 32#include "special.h"
872a590e 33#include "stat-util.h"
d054f0a4 34#include "stdio-util.h"
8b43440b 35#include "string-table.h"
07630cea 36#include "string-util.h"
aae7e17f 37#include "strv.h"
84ac7bea 38#include "unit-name.h"
b1d4f8e1 39#include "user-util.h"
8c6db833 40
e48fcfef 41static int cg_enumerate_items(const char *controller, const char *path, FILE **_f, const char *item) {
7027ff61 42 _cleanup_free_ char *fs = NULL;
c6c18be3 43 FILE *f;
7027ff61 44 int r;
c6c18be3 45
c6c18be3
LP
46 assert(_f);
47
e48fcfef 48 r = cg_get_path(controller, path, item, &fs);
c3175a7f 49 if (r < 0)
c6c18be3
LP
50 return r;
51
52 f = fopen(fs, "re");
c6c18be3
LP
53 if (!f)
54 return -errno;
55
56 *_f = f;
57 return 0;
58}
59
e48fcfef
TM
60int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
61 return cg_enumerate_items(controller, path, _f, "cgroup.procs");
62}
63
c6c18be3
LP
64int cg_read_pid(FILE *f, pid_t *_pid) {
65 unsigned long ul;
66
67 /* Note that the cgroup.procs might contain duplicates! See
68 * cgroups.txt for details. */
69
7027ff61
LP
70 assert(f);
71 assert(_pid);
72
c6c18be3
LP
73 errno = 0;
74 if (fscanf(f, "%lu", &ul) != 1) {
75
76 if (feof(f))
77 return 0;
78
66855de7 79 return errno_or_else(EIO);
c6c18be3
LP
80 }
81
82 if (ul <= 0)
83 return -EIO;
84
85 *_pid = (pid_t) ul;
86 return 1;
87}
88
8b238b13
LP
89int cg_read_event(
90 const char *controller,
91 const char *path,
92 const char *event,
31a9be23 93 char **ret) {
8b238b13 94
ab2c3861 95 _cleanup_free_ char *events = NULL, *content = NULL;
ab2c3861
TH
96 int r;
97
98 r = cg_get_path(controller, path, "cgroup.events", &events);
99 if (r < 0)
100 return r;
101
102 r = read_full_file(events, &content, NULL);
103 if (r < 0)
104 return r;
105
31a9be23
YW
106 for (const char *p = content;;) {
107 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
108 const char *q;
109
110 r = extract_first_word(&p, &line, "\n", 0);
111 if (r < 0)
112 return r;
113 if (r == 0)
114 return -ENOENT;
115
116 q = line;
117 r = extract_first_word(&q, &key, " ", 0);
118 if (r < 0)
119 return r;
120 if (r == 0)
ab2c3861
TH
121 return -EINVAL;
122
31a9be23 123 if (!streq(key, event))
ab2c3861
TH
124 continue;
125
31a9be23
YW
126 val = strdup(q);
127 if (!val)
128 return -ENOMEM;
129
130 *ret = TAKE_PTR(val);
ab2c3861
TH
131 return 0;
132 }
ab2c3861
TH
133}
134
3228995c
CB
135bool cg_ns_supported(void) {
136 static thread_local int enabled = -1;
137
138 if (enabled >= 0)
139 return enabled;
140
0887fa71
LP
141 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
142 if (errno != ENOENT)
143 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
144 enabled = false;
145 } else
146 enabled = true;
3228995c
CB
147
148 return enabled;
149}
150
35d2e7ec 151int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 152 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
153 int r;
154 DIR *d;
155
35d2e7ec
LP
156 assert(_d);
157
158 /* This is not recursive! */
159
c3175a7f
LP
160 r = cg_get_path(controller, path, NULL, &fs);
161 if (r < 0)
35d2e7ec
LP
162 return r;
163
164 d = opendir(fs);
35d2e7ec
LP
165 if (!d)
166 return -errno;
167
168 *_d = d;
169 return 0;
170}
171
172int cg_read_subgroup(DIR *d, char **fn) {
173 struct dirent *de;
174
175 assert(d);
7027ff61 176 assert(fn);
35d2e7ec 177
f01327ad 178 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
179 char *b;
180
181 if (de->d_type != DT_DIR)
182 continue;
183
49bfc877 184 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
185 continue;
186
7027ff61
LP
187 b = strdup(de->d_name);
188 if (!b)
35d2e7ec
LP
189 return -ENOMEM;
190
191 *fn = b;
192 return 1;
193 }
194
35d2e7ec
LP
195 return 0;
196}
197
4ad49000 198int cg_rmdir(const char *controller, const char *path) {
7027ff61 199 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
200 int r;
201
ad293f5a
LP
202 r = cg_get_path(controller, path, NULL, &p);
203 if (r < 0)
35d2e7ec
LP
204 return r;
205
206 r = rmdir(p);
7027ff61
LP
207 if (r < 0 && errno != ENOENT)
208 return -errno;
35d2e7ec 209
b4cccbc1 210 r = cg_hybrid_unified();
f20db199 211 if (r <= 0)
b4cccbc1 212 return r;
b4cccbc1
LP
213
214 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
215 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
216 if (r < 0)
217 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
218 }
219
7027ff61 220 return 0;
35d2e7ec
LP
221}
222
e48fcfef 223static int cg_kill_items(
1d98fef1
LP
224 const char *controller,
225 const char *path,
226 int sig,
227 CGroupFlags flags,
228 Set *s,
229 cg_kill_log_func_t log_kill,
e48fcfef
TM
230 void *userdata,
231 const char *item) {
1d98fef1 232
7027ff61 233 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 234 bool done = false;
c53d2d54 235 int r, ret = 0, ret_log_kill = 0;
35d2e7ec 236 pid_t my_pid;
8c6db833 237
8c6db833
LP
238 assert(sig >= 0);
239
0d5b4810
LP
240 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
241 * SIGCONT on SIGKILL. */
242 if (IN_SET(sig, SIGCONT, SIGKILL))
243 flags &= ~CGROUP_SIGCONT;
244
8c6db833
LP
245 /* This goes through the tasks list and kills them all. This
246 * is repeated until no further processes are added to the
247 * tasks list, to properly handle forking processes */
248
7027ff61 249 if (!s) {
d5099efc 250 s = allocated_set = set_new(NULL);
7027ff61 251 if (!s)
ca949c9d 252 return -ENOMEM;
7027ff61 253 }
8c6db833 254
df0ff127 255 my_pid = getpid_cached();
8c6db833
LP
256
257 do {
7027ff61 258 _cleanup_fclose_ FILE *f = NULL;
0b172489 259 pid_t pid = 0;
8c6db833
LP
260 done = true;
261
e48fcfef 262 r = cg_enumerate_items(controller, path, &f, item);
7027ff61 263 if (r < 0) {
4c633005 264 if (ret >= 0 && r != -ENOENT)
7027ff61 265 return r;
35d2e7ec 266
7027ff61 267 return ret;
35d2e7ec 268 }
c6c18be3
LP
269
270 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 271
1d98fef1 272 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 273 continue;
8c6db833 274
fea72cc0 275 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 276 continue;
8c6db833 277
1d98fef1 278 if (log_kill)
c53d2d54 279 ret_log_kill = log_kill(pid, sig, userdata);
1d98fef1 280
8c6db833
LP
281 /* If we haven't killed this process yet, kill
282 * it */
4c633005
LP
283 if (kill(pid, sig) < 0) {
284 if (ret >= 0 && errno != ESRCH)
8c6db833 285 ret = -errno;
6e8314c4 286 } else {
1d98fef1 287 if (flags & CGROUP_SIGCONT)
e155a0aa 288 (void) kill(pid, SIGCONT);
430c18ed 289
c53d2d54
DB
290 if (ret == 0) {
291 if (log_kill)
292 ret = ret_log_kill;
293 else
294 ret = 1;
295 }
430c18ed 296 }
8c6db833 297
8c6db833
LP
298 done = false;
299
fea72cc0 300 r = set_put(s, PID_TO_PTR(pid));
7027ff61 301 if (r < 0) {
35d2e7ec 302 if (ret >= 0)
7027ff61 303 return r;
35d2e7ec 304
7027ff61 305 return ret;
35d2e7ec
LP
306 }
307 }
308
309 if (r < 0) {
310 if (ret >= 0)
7027ff61 311 return r;
35d2e7ec 312
7027ff61 313 return ret;
8c6db833
LP
314 }
315
8c6db833
LP
316 /* To avoid racing against processes which fork
317 * quicker than we can kill them we repeat this until
318 * no new pids need to be killed. */
319
35d2e7ec 320 } while (!done);
8c6db833 321
35d2e7ec 322 return ret;
8c6db833
LP
323}
324
e48fcfef
TM
325int cg_kill(
326 const char *controller,
327 const char *path,
328 int sig,
329 CGroupFlags flags,
330 Set *s,
331 cg_kill_log_func_t log_kill,
332 void *userdata) {
333 int r;
334
335 r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.procs");
336 if (r < 0 || sig != SIGKILL)
337 return r;
338
339 /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
cda5ccdb
TM
340 a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
341 (4340d175b898) and 4.14.138 (feb6b123b7dd). */
e48fcfef 342 r = cg_unified_controller(controller);
38288f0b 343 if (r <= 0)
e48fcfef 344 return r;
e48fcfef
TM
345
346 return cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.threads");
347}
348
1d98fef1
LP
349int cg_kill_recursive(
350 const char *controller,
351 const char *path,
352 int sig,
353 CGroupFlags flags,
354 Set *s,
355 cg_kill_log_func_t log_kill,
356 void *userdata) {
357
7027ff61
LP
358 _cleanup_set_free_ Set *allocated_set = NULL;
359 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 360 int r, ret;
35d2e7ec 361 char *fn;
8c6db833
LP
362
363 assert(path);
8c6db833
LP
364 assert(sig >= 0);
365
7027ff61 366 if (!s) {
d5099efc 367 s = allocated_set = set_new(NULL);
7027ff61 368 if (!s)
ca949c9d 369 return -ENOMEM;
7027ff61 370 }
ca949c9d 371
1d98fef1 372 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 373
7027ff61
LP
374 r = cg_enumerate_subgroups(controller, path, &d);
375 if (r < 0) {
4c633005 376 if (ret >= 0 && r != -ENOENT)
7027ff61 377 return r;
8c6db833 378
7027ff61 379 return ret;
35d2e7ec 380 }
8c6db833 381
35d2e7ec 382 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 383 _cleanup_free_ char *p = NULL;
8c6db833 384
95b21cff 385 p = path_join(empty_to_root(path), fn);
35d2e7ec 386 free(fn);
7027ff61
LP
387 if (!p)
388 return -ENOMEM;
8c6db833 389
1d98fef1 390 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 391 if (r != 0 && ret >= 0)
35d2e7ec 392 ret = r;
8c6db833 393 }
7027ff61 394 if (ret >= 0 && r < 0)
35d2e7ec
LP
395 ret = r;
396
1d98fef1 397 if (flags & CGROUP_REMOVE) {
4ad49000 398 r = cg_rmdir(controller, path);
4c701096 399 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
400 return r;
401 }
ca949c9d 402
8c6db833
LP
403 return ret;
404}
405
efdb0237
LP
406static const char *controller_to_dirname(const char *controller) {
407 const char *e;
3474ae3c 408
7027ff61
LP
409 assert(controller);
410
efdb0237
LP
411 /* Converts a controller name to the directory name below
412 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
413 * just cuts off the name= prefixed used for named
414 * hierarchies, if it is specified. */
415
2977724b 416 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 417 if (cg_hybrid_unified() > 0)
2977724b
TH
418 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
419 else
420 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
421 }
b6629c4b 422
efdb0237
LP
423 e = startswith(controller, "name=");
424 if (e)
425 return e;
426
427 return controller;
3474ae3c
LP
428}
429
569b19d8
LP
430static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
431 const char *dn;
018ef268 432 char *t = NULL;
3474ae3c 433
efdb0237 434 assert(fs);
569b19d8
LP
435 assert(controller);
436
437 dn = controller_to_dirname(controller);
efdb0237
LP
438
439 if (isempty(path) && isempty(suffix))
657ee2d8 440 t = path_join("/sys/fs/cgroup", dn);
efdb0237 441 else if (isempty(path))
657ee2d8 442 t = path_join("/sys/fs/cgroup", dn, suffix);
efdb0237 443 else if (isempty(suffix))
657ee2d8 444 t = path_join("/sys/fs/cgroup", dn, path);
efdb0237 445 else
657ee2d8 446 t = path_join("/sys/fs/cgroup", dn, path, suffix);
efdb0237
LP
447 if (!t)
448 return -ENOMEM;
3474ae3c 449
efdb0237
LP
450 *fs = t;
451 return 0;
452}
453
454static int join_path_unified(const char *path, const char *suffix, char **fs) {
455 char *t;
456
457 assert(fs);
458
459 if (isempty(path) && isempty(suffix))
460 t = strdup("/sys/fs/cgroup");
461 else if (isempty(path))
657ee2d8 462 t = path_join("/sys/fs/cgroup", suffix);
efdb0237 463 else if (isempty(suffix))
657ee2d8 464 t = path_join("/sys/fs/cgroup", path);
efdb0237 465 else
657ee2d8 466 t = path_join("/sys/fs/cgroup", path, suffix);
3474ae3c
LP
467 if (!t)
468 return -ENOMEM;
469
efdb0237 470 *fs = t;
3474ae3c
LP
471 return 0;
472}
473
8c6db833 474int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 475 int r;
8c6db833 476
dbd821ac
LP
477 assert(fs);
478
efdb0237
LP
479 if (!controller) {
480 char *t;
481
569b19d8
LP
482 /* If no controller is specified, we return the path
483 * *below* the controllers, without any prefix. */
efdb0237
LP
484
485 if (!path && !suffix)
486 return -EINVAL;
487
989189ea 488 if (!suffix)
efdb0237 489 t = strdup(path);
989189ea 490 else if (!path)
efdb0237
LP
491 t = strdup(suffix);
492 else
657ee2d8 493 t = path_join(path, suffix);
efdb0237
LP
494 if (!t)
495 return -ENOMEM;
496
858d36c1 497 *fs = path_simplify(t, false);
efdb0237
LP
498 return 0;
499 }
500
501 if (!cg_controller_is_valid(controller))
78edb35a
LP
502 return -EINVAL;
503
b4cccbc1
LP
504 r = cg_all_unified();
505 if (r < 0)
506 return r;
507 if (r > 0)
efdb0237 508 r = join_path_unified(path, suffix, fs);
569b19d8
LP
509 else
510 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
511 if (r < 0)
512 return r;
7027ff61 513
858d36c1 514 path_simplify(*fs, false);
efdb0237 515 return 0;
3474ae3c 516}
dbd821ac 517
efdb0237 518static int controller_is_accessible(const char *controller) {
b4cccbc1 519 int r;
37099707 520
efdb0237 521 assert(controller);
37099707 522
efdb0237
LP
523 /* Checks whether a specific controller is accessible,
524 * i.e. its hierarchy mounted. In the unified hierarchy all
525 * controllers are considered accessible, except for the named
526 * hierarchies */
b12afc8c 527
efdb0237
LP
528 if (!cg_controller_is_valid(controller))
529 return -EINVAL;
530
b4cccbc1
LP
531 r = cg_all_unified();
532 if (r < 0)
533 return r;
534 if (r > 0) {
efdb0237
LP
535 /* We don't support named hierarchies if we are using
536 * the unified hierarchy. */
537
538 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
539 return 0;
540
541 if (startswith(controller, "name="))
542 return -EOPNOTSUPP;
543
544 } else {
545 const char *cc, *dn;
546
547 dn = controller_to_dirname(controller);
548 cc = strjoina("/sys/fs/cgroup/", dn);
549
550 if (laccess(cc, F_OK) < 0)
551 return -errno;
552 }
37099707
LP
553
554 return 0;
555}
556
3474ae3c 557int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 558 int r;
dbd821ac 559
efdb0237 560 assert(controller);
3474ae3c 561 assert(fs);
70132bd0 562
efdb0237
LP
563 /* Check if the specified controller is actually accessible */
564 r = controller_is_accessible(controller);
37099707
LP
565 if (r < 0)
566 return r;
3474ae3c 567
efdb0237 568 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
569}
570
4b58153d
LP
571int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
572 _cleanup_free_ char *fs = NULL;
573 int r;
574
575 assert(path);
576 assert(name);
577 assert(value || size <= 0);
578
579 r = cg_get_path(controller, path, NULL, &fs);
580 if (r < 0)
581 return r;
582
583 if (setxattr(fs, name, value, size, flags) < 0)
584 return -errno;
585
586 return 0;
587}
588
589int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
590 _cleanup_free_ char *fs = NULL;
591 ssize_t n;
592 int r;
593
594 assert(path);
595 assert(name);
596
597 r = cg_get_path(controller, path, NULL, &fs);
598 if (r < 0)
599 return r;
600
601 n = getxattr(fs, name, value, size);
602 if (n < 0)
603 return -errno;
604
605 return (int) n;
606}
607
bf25f165
LP
608int cg_remove_xattr(const char *controller, const char *path, const char *name) {
609 _cleanup_free_ char *fs = NULL;
610 int r;
611
612 assert(path);
613 assert(name);
614
615 r = cg_get_path(controller, path, NULL, &fs);
616 if (r < 0)
617 return r;
618
619 if (removexattr(fs, name) < 0)
620 return -errno;
621
622 return 0;
623}
624
7027ff61 625int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 626 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 627 const char *fs, *controller_str;
d2b39cb6 628 int unified, r;
efdb0237 629 size_t cs = 0;
8c6db833 630
8c6db833 631 assert(path);
c6c18be3 632 assert(pid >= 0);
8c6db833 633
5da38d07
TH
634 if (controller) {
635 if (!cg_controller_is_valid(controller))
636 return -EINVAL;
637 } else
638 controller = SYSTEMD_CGROUP_CONTROLLER;
639
c22800e4 640 unified = cg_unified_controller(controller);
b4cccbc1
LP
641 if (unified < 0)
642 return unified;
643 if (unified == 0) {
b6629c4b
TH
644 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
645 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
646 else
647 controller_str = controller;
648
649 cs = strlen(controller_str);
650 }
7027ff61 651
b68fa010 652 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
653 r = fopen_unlocked(fs, "re", &f);
654 if (r == -ENOENT)
655 return -ESRCH;
656 if (r < 0)
657 return r;
35bbbf85 658
d2b39cb6
LP
659 for (;;) {
660 _cleanup_free_ char *line = NULL;
efdb0237 661 char *e, *p;
c6c18be3 662
d2b39cb6
LP
663 r = read_line(f, LONG_LINE_MAX, &line);
664 if (r < 0)
665 return r;
666 if (r == 0)
667 break;
c6c18be3 668
efdb0237
LP
669 if (unified) {
670 e = startswith(line, "0:");
671 if (!e)
672 continue;
c6c18be3 673
efdb0237
LP
674 e = strchr(e, ':');
675 if (!e)
676 continue;
677 } else {
678 char *l;
679 size_t k;
680 const char *word, *state;
681 bool found = false;
682
683 l = strchr(line, ':');
684 if (!l)
685 continue;
8af8afd6 686
efdb0237
LP
687 l++;
688 e = strchr(l, ':');
689 if (!e)
690 continue;
8af8afd6 691
efdb0237 692 *e = 0;
00d4b1e6 693 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 694 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
695 found = true;
696 break;
697 }
efdb0237
LP
698 if (!found)
699 continue;
8af8afd6
LP
700 }
701
8af8afd6 702 p = strdup(e + 1);
7027ff61
LP
703 if (!p)
704 return -ENOMEM;
c6c18be3 705
5e20b0a4
LP
706 /* Truncate suffix indicating the process is a zombie */
707 e = endswith(p, " (deleted)");
708 if (e)
709 *e = 0;
710
c6c18be3 711 *path = p;
7027ff61 712 return 0;
c6c18be3
LP
713 }
714
1c80e425 715 return -ENODATA;
8c6db833
LP
716}
717
718int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 719 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 720 const char *sc;
415fc41c 721 int r;
8c6db833 722
8c6db833
LP
723 assert(agent);
724
c22800e4 725 r = cg_unified_controller(controller);
b4cccbc1
LP
726 if (r < 0)
727 return r;
728 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
729 return -EOPNOTSUPP;
730
7027ff61
LP
731 r = cg_get_path(controller, NULL, "release_agent", &fs);
732 if (r < 0)
c6c18be3 733 return r;
8c6db833 734
7027ff61
LP
735 r = read_one_line_file(fs, &contents);
736 if (r < 0)
737 return r;
8c6db833
LP
738
739 sc = strstrip(contents);
e155a0aa 740 if (isempty(sc)) {
604028de 741 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 742 if (r < 0)
7027ff61 743 return r;
b8725df8 744 } else if (!path_equal(sc, agent))
7027ff61 745 return -EEXIST;
8c6db833 746
0da16248 747 fs = mfree(fs);
7027ff61
LP
748 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
749 if (r < 0)
750 return r;
8c6db833 751
0da16248 752 contents = mfree(contents);
7027ff61
LP
753 r = read_one_line_file(fs, &contents);
754 if (r < 0)
755 return r;
8c6db833
LP
756
757 sc = strstrip(contents);
8c6db833 758 if (streq(sc, "0")) {
604028de 759 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
760 if (r < 0)
761 return r;
c6c18be3 762
7027ff61
LP
763 return 1;
764 }
8c6db833 765
7027ff61
LP
766 if (!streq(sc, "1"))
767 return -EIO;
8c6db833 768
7027ff61 769 return 0;
8c6db833
LP
770}
771
ad929bcc
KS
772int cg_uninstall_release_agent(const char *controller) {
773 _cleanup_free_ char *fs = NULL;
415fc41c 774 int r;
efdb0237 775
c22800e4 776 r = cg_unified_controller(controller);
b4cccbc1
LP
777 if (r < 0)
778 return r;
779 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 780 return -EOPNOTSUPP;
ad929bcc 781
ac9ef333
LP
782 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
783 if (r < 0)
784 return r;
785
604028de 786 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
787 if (r < 0)
788 return r;
789
0da16248 790 fs = mfree(fs);
ac9ef333 791
ad929bcc
KS
792 r = cg_get_path(controller, NULL, "release_agent", &fs);
793 if (r < 0)
794 return r;
795
604028de 796 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
797 if (r < 0)
798 return r;
799
ac9ef333 800 return 0;
ad929bcc
KS
801}
802
6f883237 803int cg_is_empty(const char *controller, const char *path) {
7027ff61 804 _cleanup_fclose_ FILE *f = NULL;
efdb0237 805 pid_t pid;
7027ff61 806 int r;
8c6db833 807
8c6db833
LP
808 assert(path);
809
b043cd0b 810 r = cg_enumerate_processes(controller, path, &f);
6f883237 811 if (r == -ENOENT)
1bcf3fc6 812 return true;
c3175a7f 813 if (r < 0)
6f883237 814 return r;
8c6db833 815
6f883237 816 r = cg_read_pid(f, &pid);
c6c18be3
LP
817 if (r < 0)
818 return r;
8c6db833 819
6f883237 820 return r == 0;
8c6db833
LP
821}
822
6f883237 823int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 824 int r;
8c6db833 825
8c6db833
LP
826 assert(path);
827
6fd66507 828 /* The root cgroup is always populated */
57ea45e1 829 if (controller && empty_or_root(path))
efdb0237 830 return false;
6fd66507 831
c22800e4 832 r = cg_unified_controller(controller);
b4cccbc1
LP
833 if (r < 0)
834 return r;
835 if (r > 0) {
ab2c3861 836 _cleanup_free_ char *t = NULL;
8c6db833 837
efdb0237 838 /* On the unified hierarchy we can check empty state
ab2c3861 839 * via the "populated" attribute of "cgroup.events". */
8c6db833 840
ab2c3861 841 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
842 if (r == -ENOENT)
843 return true;
efdb0237
LP
844 if (r < 0)
845 return r;
846
847 return streq(t, "0");
848 } else {
849 _cleanup_closedir_ DIR *d = NULL;
850 char *fn;
8c6db833 851
efdb0237 852 r = cg_is_empty(controller, path);
35d2e7ec 853 if (r <= 0)
7027ff61 854 return r;
35d2e7ec 855
efdb0237
LP
856 r = cg_enumerate_subgroups(controller, path, &d);
857 if (r == -ENOENT)
1bcf3fc6 858 return true;
efdb0237
LP
859 if (r < 0)
860 return r;
35d2e7ec 861
efdb0237
LP
862 while ((r = cg_read_subgroup(d, &fn)) > 0) {
863 _cleanup_free_ char *p = NULL;
864
657ee2d8 865 p = path_join(path, fn);
efdb0237
LP
866 free(fn);
867 if (!p)
868 return -ENOMEM;
869
870 r = cg_is_empty_recursive(controller, p);
871 if (r <= 0)
872 return r;
873 }
874 if (r < 0)
875 return r;
876
877 return true;
878 }
35d2e7ec
LP
879}
880
881int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 882 char *t = NULL, *u = NULL;
efdb0237 883 const char *e;
35d2e7ec
LP
884
885 assert(spec);
35d2e7ec
LP
886
887 if (*spec == '/') {
99be45a4 888 if (!path_is_normalized(spec))
e884315e 889 return -EINVAL;
35d2e7ec
LP
890
891 if (path) {
246aa6dd
LP
892 t = strdup(spec);
893 if (!t)
35d2e7ec
LP
894 return -ENOMEM;
895
858d36c1 896 *path = path_simplify(t, false);
8c6db833
LP
897 }
898
35d2e7ec
LP
899 if (controller)
900 *controller = NULL;
901
902 return 0;
8c6db833
LP
903 }
904
246aa6dd
LP
905 e = strchr(spec, ':');
906 if (!e) {
185a0874 907 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
908 return -EINVAL;
909
910 if (controller) {
efdb0237 911 t = strdup(spec);
246aa6dd 912 if (!t)
35d2e7ec
LP
913 return -ENOMEM;
914
915 *controller = t;
916 }
917
918 if (path)
919 *path = NULL;
920
921 return 0;
8c6db833
LP
922 }
923
efdb0237 924 t = strndup(spec, e-spec);
e884315e
LP
925 if (!t)
926 return -ENOMEM;
185a0874 927 if (!cg_controller_is_valid(t)) {
e884315e 928 free(t);
35d2e7ec 929 return -EINVAL;
246aa6dd
LP
930 }
931
efdb0237
LP
932 if (isempty(e+1))
933 u = NULL;
934 else {
baa89da4
LP
935 u = strdup(e+1);
936 if (!u) {
937 free(t);
938 return -ENOMEM;
939 }
35d2e7ec 940
99be45a4 941 if (!path_is_normalized(u) ||
baa89da4
LP
942 !path_is_absolute(u)) {
943 free(t);
944 free(u);
945 return -EINVAL;
946 }
947
858d36c1 948 path_simplify(u, false);
baa89da4 949 }
5954c074 950
35d2e7ec
LP
951 if (controller)
952 *controller = t;
e884315e
LP
953 else
954 free(t);
35d2e7ec
LP
955
956 if (path)
957 *path = u;
e884315e
LP
958 else
959 free(u);
35d2e7ec
LP
960
961 return 0;
8c6db833 962}
c6c18be3 963
7027ff61 964int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
965 _cleanup_free_ char *c = NULL, *p = NULL;
966 char *t;
35d2e7ec
LP
967 int r;
968
969 assert(path);
970 assert(result);
971
73e231ab 972 /* First, check if it already is a filesystem path */
7027ff61 973 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 974
b69d29ce
LP
975 t = strdup(path);
976 if (!t)
35d2e7ec
LP
977 return -ENOMEM;
978
858d36c1 979 *result = path_simplify(t, false);
35d2e7ec
LP
980 return 0;
981 }
982
73e231ab 983 /* Otherwise, treat it as cg spec */
b69d29ce
LP
984 r = cg_split_spec(path, &c, &p);
985 if (r < 0)
35d2e7ec
LP
986 return r;
987
efdb0237 988 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 989}
1f73f0f1 990
7027ff61 991int cg_get_root_path(char **path) {
9444b1f2 992 char *p, *e;
7027ff61
LP
993 int r;
994
995 assert(path);
996
9444b1f2 997 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
998 if (r < 0)
999 return r;
1000
efdb0237
LP
1001 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1002 if (!e)
1003 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1004 if (!e)
1005 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1006 if (e)
7027ff61
LP
1007 *e = 0;
1008
1f73f0f1
LP
1009 *path = p;
1010 return 0;
1011}
b59e2465 1012
751bc6ac
LP
1013int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1014 _cleanup_free_ char *rt = NULL;
1015 char *p;
ba1261bc
LP
1016 int r;
1017
e9174f29 1018 assert(cgroup);
751bc6ac 1019 assert(shifted);
e9174f29
LP
1020
1021 if (!root) {
1022 /* If the root was specified let's use that, otherwise
1023 * let's determine it from PID 1 */
1024
751bc6ac 1025 r = cg_get_root_path(&rt);
e9174f29
LP
1026 if (r < 0)
1027 return r;
1028
751bc6ac 1029 root = rt;
e9174f29 1030 }
ba1261bc 1031
751bc6ac 1032 p = path_startswith(cgroup, root);
efdb0237 1033 if (p && p > cgroup)
751bc6ac
LP
1034 *shifted = p - 1;
1035 else
1036 *shifted = cgroup;
1037
1038 return 0;
1039}
1040
1041int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1042 _cleanup_free_ char *raw = NULL;
1043 const char *c;
1044 int r;
1045
1046 assert(pid >= 0);
1047 assert(cgroup);
1048
1049 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1050 if (r < 0)
ba1261bc 1051 return r;
ba1261bc 1052
751bc6ac
LP
1053 r = cg_shift_path(raw, root, &c);
1054 if (r < 0)
1055 return r;
ba1261bc 1056
ae2a15bc
LP
1057 if (c == raw)
1058 *cgroup = TAKE_PTR(raw);
1059 else {
751bc6ac 1060 char *n;
ba1261bc 1061
751bc6ac
LP
1062 n = strdup(c);
1063 if (!n)
ba1261bc 1064 return -ENOMEM;
ba1261bc 1065
751bc6ac
LP
1066 *cgroup = n;
1067 }
ba1261bc
LP
1068
1069 return 0;
1070}
1071
9ed794a3 1072int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1073 char *c, *s;
1074 size_t n;
ef1673d1
MT
1075
1076 assert(cgroup);
6c03089c 1077 assert(unit);
ef1673d1 1078
8b0849e9
LP
1079 n = strcspn(cgroup, "/");
1080 if (n < 3)
1081 return -ENXIO;
1082
1083 c = strndupa(cgroup, n);
ae018d9b 1084 c = cg_unescape(c);
ef1673d1 1085
7410616c 1086 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1087 return -ENXIO;
ef1673d1 1088
d7bd3de0 1089 s = strdup(c);
6c03089c
LP
1090 if (!s)
1091 return -ENOMEM;
1092
1093 *unit = s;
ef1673d1
MT
1094 return 0;
1095}
1096
8b0849e9
LP
1097static bool valid_slice_name(const char *p, size_t n) {
1098
1099 if (!p)
1100 return false;
1101
fbd0b64f 1102 if (n < STRLEN("x.slice"))
8b0849e9
LP
1103 return false;
1104
1105 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1106 char buf[n+1], *c;
1107
1108 memcpy(buf, p, n);
1109 buf[n] = 0;
1110
1111 c = cg_unescape(buf);
1112
7410616c 1113 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1114 }
1115
1116 return false;
1117}
1118
9444b1f2 1119static const char *skip_slices(const char *p) {
8b0849e9
LP
1120 assert(p);
1121
9444b1f2
LP
1122 /* Skips over all slice assignments */
1123
1124 for (;;) {
1021b21b
LP
1125 size_t n;
1126
9444b1f2
LP
1127 p += strspn(p, "/");
1128
1129 n = strcspn(p, "/");
8b0849e9 1130 if (!valid_slice_name(p, n))
9444b1f2
LP
1131 return p;
1132
1133 p += n;
1134 }
1135}
1136
8b0849e9 1137int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1138 const char *e;
8b0849e9
LP
1139 char *unit;
1140 int r;
6c03089c
LP
1141
1142 assert(path);
8b0849e9 1143 assert(ret);
6c03089c 1144
9444b1f2 1145 e = skip_slices(path);
6c03089c 1146
8b0849e9
LP
1147 r = cg_path_decode_unit(e, &unit);
1148 if (r < 0)
1149 return r;
1150
1151 /* We skipped over the slices, don't accept any now */
1152 if (endswith(unit, ".slice")) {
1153 free(unit);
1154 return -ENXIO;
1155 }
1156
1157 *ret = unit;
1158 return 0;
6c03089c
LP
1159}
1160
1161int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1162 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1163 int r;
ba1261bc 1164
ef1673d1
MT
1165 assert(unit);
1166
7027ff61 1167 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1168 if (r < 0)
1169 return r;
1170
6c03089c
LP
1171 return cg_path_get_unit(cgroup, unit);
1172}
ef1673d1 1173
d4fffc4b
ZJS
1174/**
1175 * Skip session-*.scope, but require it to be there.
1176 */
9444b1f2
LP
1177static const char *skip_session(const char *p) {
1178 size_t n;
1179
8b0849e9
LP
1180 if (isempty(p))
1181 return NULL;
9444b1f2
LP
1182
1183 p += strspn(p, "/");
1184
1185 n = strcspn(p, "/");
fbd0b64f 1186 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1187 return NULL;
1188
8b0849e9
LP
1189 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1190 char buf[n - 8 - 6 + 1];
1191
1192 memcpy(buf, p + 8, n - 8 - 6);
1193 buf[n - 8 - 6] = 0;
d4fffc4b 1194
8b0849e9
LP
1195 /* Note that session scopes never need unescaping,
1196 * since they cannot conflict with the kernel's own
1197 * names, hence we don't need to call cg_unescape()
1198 * here. */
1199
1200 if (!session_id_valid(buf))
1201 return false;
1202
1203 p += n;
1204 p += strspn(p, "/");
1205 return p;
1206 }
1207
1208 return NULL;
d4fffc4b
ZJS
1209}
1210
1211/**
1212 * Skip user@*.service, but require it to be there.
1213 */
1214static const char *skip_user_manager(const char *p) {
1215 size_t n;
1216
8b0849e9
LP
1217 if (isempty(p))
1218 return NULL;
d4fffc4b
ZJS
1219
1220 p += strspn(p, "/");
1221
1222 n = strcspn(p, "/");
fbd0b64f 1223 if (n < STRLEN("user@x.service"))
6c03089c 1224 return NULL;
ef1673d1 1225
8b0849e9
LP
1226 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1227 char buf[n - 5 - 8 + 1];
9444b1f2 1228
8b0849e9
LP
1229 memcpy(buf, p + 5, n - 5 - 8);
1230 buf[n - 5 - 8] = 0;
1231
1232 /* Note that user manager services never need unescaping,
1233 * since they cannot conflict with the kernel's own
1234 * names, hence we don't need to call cg_unescape()
1235 * here. */
1236
1237 if (parse_uid(buf, NULL) < 0)
1238 return NULL;
1239
1240 p += n;
1241 p += strspn(p, "/");
1242
1243 return p;
1244 }
1245
1246 return NULL;
9444b1f2
LP
1247}
1248
329ac4bc 1249static const char *skip_user_prefix(const char *path) {
d4fffc4b 1250 const char *e, *t;
ef1673d1 1251
6c03089c 1252 assert(path);
ba1261bc 1253
9444b1f2
LP
1254 /* Skip slices, if there are any */
1255 e = skip_slices(path);
ba1261bc 1256
329ac4bc 1257 /* Skip the user manager, if it's in the path now... */
8b0849e9 1258 t = skip_user_manager(e);
329ac4bc
LP
1259 if (t)
1260 return t;
8b0849e9 1261
329ac4bc
LP
1262 /* Alternatively skip the user session if it is in the path... */
1263 return skip_session(e);
1264}
32081481 1265
329ac4bc
LP
1266int cg_path_get_user_unit(const char *path, char **ret) {
1267 const char *t;
6c03089c 1268
329ac4bc
LP
1269 assert(path);
1270 assert(ret);
8b0849e9 1271
329ac4bc
LP
1272 t = skip_user_prefix(path);
1273 if (!t)
8b0849e9 1274 return -ENXIO;
8b0849e9 1275
bf21be10
LP
1276 /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1277 * parser. */
329ac4bc 1278 return cg_path_get_unit(t, ret);
ef1673d1 1279}
ba1261bc 1280
ef1673d1 1281int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1282 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1283 int r;
1284
1285 assert(unit);
1286
7027ff61 1287 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1288 if (r < 0)
1289 return r;
1290
1291 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1292}
e884315e 1293
7027ff61 1294int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1295 _cleanup_free_ char *u = NULL;
1296 const char *sl;
89f7c846 1297 int r;
374ec6ab 1298
89f7c846
LP
1299 r = cg_path_get_unit(path, &u);
1300 if (r < 0)
1301 return r;
7027ff61 1302
efdb0237 1303 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1304 return readlink_malloc(sl, machine);
7027ff61
LP
1305}
1306
1307int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1308 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1309 int r;
1310
1311 assert(machine);
1312
1313 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1314 if (r < 0)
1315 return r;
1316
1317 return cg_path_get_machine_name(cgroup, machine);
1318}
1319
1320int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1321 _cleanup_free_ char *unit = NULL;
1322 char *start, *end;
1323 int r;
7027ff61
LP
1324
1325 assert(path);
7027ff61 1326
8b0849e9
LP
1327 r = cg_path_get_unit(path, &unit);
1328 if (r < 0)
1329 return r;
7027ff61 1330
8b0849e9
LP
1331 start = startswith(unit, "session-");
1332 if (!start)
cfeaa44a 1333 return -ENXIO;
8b0849e9
LP
1334 end = endswith(start, ".scope");
1335 if (!end)
cfeaa44a 1336 return -ENXIO;
8b0849e9
LP
1337
1338 *end = 0;
1339 if (!session_id_valid(start))
cfeaa44a 1340 return -ENXIO;
374ec6ab 1341
af08d2f9 1342 if (session) {
8b0849e9 1343 char *rr;
af08d2f9 1344
8b0849e9
LP
1345 rr = strdup(start);
1346 if (!rr)
af08d2f9
LP
1347 return -ENOMEM;
1348
8b0849e9 1349 *session = rr;
af08d2f9 1350 }
7027ff61 1351
7027ff61
LP
1352 return 0;
1353}
1354
1355int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1356 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1357 int r;
1358
7027ff61
LP
1359 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1360 if (r < 0)
1361 return r;
1362
1363 return cg_path_get_session(cgroup, session);
1364}
1365
ae018d9b 1366int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1367 _cleanup_free_ char *slice = NULL;
8b0849e9 1368 char *start, *end;
374ec6ab 1369 int r;
ae018d9b
LP
1370
1371 assert(path);
ae018d9b 1372
374ec6ab
LP
1373 r = cg_path_get_slice(path, &slice);
1374 if (r < 0)
1375 return r;
ae018d9b 1376
674eb685
LP
1377 start = startswith(slice, "user-");
1378 if (!start)
cfeaa44a 1379 return -ENXIO;
8b0849e9 1380 end = endswith(start, ".slice");
674eb685 1381 if (!end)
cfeaa44a 1382 return -ENXIO;
ae018d9b 1383
8b0849e9
LP
1384 *end = 0;
1385 if (parse_uid(start, uid) < 0)
cfeaa44a 1386 return -ENXIO;
674eb685 1387
674eb685 1388 return 0;
ae018d9b
LP
1389}
1390
1391int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1392 _cleanup_free_ char *cgroup = NULL;
1393 int r;
1394
ae018d9b
LP
1395 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1396 if (r < 0)
1397 return r;
1398
1399 return cg_path_get_owner_uid(cgroup, uid);
1400}
1401
1021b21b
LP
1402int cg_path_get_slice(const char *p, char **slice) {
1403 const char *e = NULL;
1021b21b
LP
1404
1405 assert(p);
1406 assert(slice);
1407
329ac4bc
LP
1408 /* Finds the right-most slice unit from the beginning, but
1409 * stops before we come to the first non-slice unit. */
1410
1021b21b
LP
1411 for (;;) {
1412 size_t n;
1413
1414 p += strspn(p, "/");
1415
1416 n = strcspn(p, "/");
8b0849e9 1417 if (!valid_slice_name(p, n)) {
1021b21b 1418
8b0849e9
LP
1419 if (!e) {
1420 char *s;
1021b21b 1421
e5d855d3 1422 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1423 if (!s)
1424 return -ENOMEM;
1021b21b 1425
8b0849e9
LP
1426 *slice = s;
1427 return 0;
1428 }
1429
1430 return cg_path_decode_unit(e, slice);
1021b21b
LP
1431 }
1432
1433 e = p;
1021b21b
LP
1434 p += n;
1435 }
1436}
1437
1438int cg_pid_get_slice(pid_t pid, char **slice) {
1439 _cleanup_free_ char *cgroup = NULL;
1440 int r;
1441
1442 assert(slice);
1443
1444 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1445 if (r < 0)
1446 return r;
1447
1448 return cg_path_get_slice(cgroup, slice);
1449}
1450
329ac4bc
LP
1451int cg_path_get_user_slice(const char *p, char **slice) {
1452 const char *t;
1453 assert(p);
1454 assert(slice);
1455
1456 t = skip_user_prefix(p);
1457 if (!t)
1458 return -ENXIO;
1459
1460 /* And now it looks pretty much the same as for a system
1461 * slice, so let's just use the same parser from here on. */
1462 return cg_path_get_slice(t, slice);
1463}
1464
1465int cg_pid_get_user_slice(pid_t pid, char **slice) {
1466 _cleanup_free_ char *cgroup = NULL;
1467 int r;
1468
1469 assert(slice);
1470
1471 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1472 if (r < 0)
1473 return r;
1474
1475 return cg_path_get_user_slice(cgroup, slice);
1476}
1477
ae018d9b
LP
1478char *cg_escape(const char *p) {
1479 bool need_prefix = false;
1480
1481 /* This implements very minimal escaping for names to be used
1482 * as file names in the cgroup tree: any name which might
1483 * conflict with a kernel name or is prefixed with '_' is
1484 * prefixed with a '_'. That way, when reading cgroup names it
1485 * is sufficient to remove a single prefixing underscore if
1486 * there is one. */
1487
1488 /* The return value of this function (unlike cg_unescape())
1489 * needs free()! */
1490
4c701096 1491 if (IN_SET(p[0], 0, '_', '.') ||
0cbd293e 1492 STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
efdb0237 1493 startswith(p, "cgroup."))
ae018d9b
LP
1494 need_prefix = true;
1495 else {
1496 const char *dot;
1497
1498 dot = strrchr(p, '.');
1499 if (dot) {
efdb0237
LP
1500 CGroupController c;
1501 size_t l = dot - p;
ae018d9b 1502
efdb0237
LP
1503 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1504 const char *n;
1505
1506 n = cgroup_controller_to_string(c);
ae018d9b 1507
efdb0237
LP
1508 if (l != strlen(n))
1509 continue;
ae018d9b 1510
efdb0237
LP
1511 if (memcmp(p, n, l) != 0)
1512 continue;
1513
1514 need_prefix = true;
1515 break;
ae018d9b
LP
1516 }
1517 }
1518 }
1519
1520 if (need_prefix)
b910cc72 1521 return strjoin("_", p);
efdb0237
LP
1522
1523 return strdup(p);
ae018d9b
LP
1524}
1525
1526char *cg_unescape(const char *p) {
1527 assert(p);
1528
1529 /* The return value of this function (unlike cg_escape())
1530 * doesn't need free()! */
1531
1532 if (p[0] == '_')
1533 return (char*) p+1;
1534
1535 return (char*) p;
1536}
78edb35a
LP
1537
1538#define CONTROLLER_VALID \
4b549144 1539 DIGITS LETTERS \
78edb35a
LP
1540 "_"
1541
185a0874 1542bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1543 const char *t, *s;
1544
1545 if (!p)
1546 return false;
1547
b6629c4b
TH
1548 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1549 return true;
1550
185a0874
DJL
1551 s = startswith(p, "name=");
1552 if (s)
1553 p = s;
78edb35a 1554
4c701096 1555 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1556 return false;
1557
1558 for (t = p; *t; t++)
1559 if (!strchr(CONTROLLER_VALID, *t))
1560 return false;
1561
1562 if (t - p > FILENAME_MAX)
1563 return false;
1564
1565 return true;
1566}
a016b922
LP
1567
1568int cg_slice_to_path(const char *unit, char **ret) {
1569 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1570 const char *dash;
7410616c 1571 int r;
a016b922
LP
1572
1573 assert(unit);
1574 assert(ret);
1575
e5d855d3 1576 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1577 char *x;
1578
1579 x = strdup("");
1580 if (!x)
1581 return -ENOMEM;
1582 *ret = x;
1583 return 0;
1584 }
1585
7410616c 1586 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1587 return -EINVAL;
1588
1589 if (!endswith(unit, ".slice"))
1590 return -EINVAL;
1591
7410616c
LP
1592 r = unit_name_to_prefix(unit, &p);
1593 if (r < 0)
1594 return r;
a016b922
LP
1595
1596 dash = strchr(p, '-');
e66e5b61
LP
1597
1598 /* Don't allow initial dashes */
1599 if (dash == p)
1600 return -EINVAL;
1601
a016b922
LP
1602 while (dash) {
1603 _cleanup_free_ char *escaped = NULL;
1604 char n[dash - p + sizeof(".slice")];
1605
989290db 1606#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501 1607 /* msan doesn't instrument stpncpy, so it thinks
5238e957 1608 * n is later used uninitialized:
1c56d501
ZJS
1609 * https://github.com/google/sanitizers/issues/926
1610 */
1611 zero(n);
1612#endif
1613
e66e5b61 1614 /* Don't allow trailing or double dashes */
4c701096 1615 if (IN_SET(dash[1], 0, '-'))
c96cc582 1616 return -EINVAL;
a016b922 1617
c96cc582 1618 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1619 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1620 return -EINVAL;
1621
1622 escaped = cg_escape(n);
1623 if (!escaped)
1624 return -ENOMEM;
1625
1626 if (!strextend(&s, escaped, "/", NULL))
1627 return -ENOMEM;
1628
1629 dash = strchr(dash+1, '-');
1630 }
1631
1632 e = cg_escape(unit);
1633 if (!e)
1634 return -ENOMEM;
1635
1636 if (!strextend(&s, e, NULL))
1637 return -ENOMEM;
1638
ae2a15bc 1639 *ret = TAKE_PTR(s);
a016b922
LP
1640
1641 return 0;
1642}
4ad49000
LP
1643
1644int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1645 _cleanup_free_ char *p = NULL;
1646 int r;
1647
1648 r = cg_get_path(controller, path, attribute, &p);
1649 if (r < 0)
1650 return r;
1651
604028de 1652 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
1653}
1654
934277fe
LP
1655int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1656 _cleanup_free_ char *p = NULL;
1657 int r;
1658
1659 r = cg_get_path(controller, path, attribute, &p);
1660 if (r < 0)
1661 return r;
1662
1663 return read_one_line_file(p, ret);
1664}
1665
b734a4ff
LP
1666int cg_get_keyed_attribute(
1667 const char *controller,
1668 const char *path,
1669 const char *attribute,
1670 char **keys,
1671 char **ret_values) {
66ebf6c0 1672
b734a4ff 1673 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 1674 const char *p;
9177fa9f 1675 size_t n, i, n_done = 0;
b734a4ff
LP
1676 char **v;
1677 int r;
1678
4e1dfa45 1679 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
1680 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1681 * entries as 'keys'. On success each entry will be set to the value of the matching key.
1682 *
1683 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
1684
1685 r = cg_get_path(controller, path, attribute, &filename);
1686 if (r < 0)
1687 return r;
1688
b734a4ff 1689 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
1690 if (r < 0)
1691 return r;
1692
b734a4ff
LP
1693 n = strv_length(keys);
1694 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1695 return 0;
66ebf6c0 1696
b734a4ff
LP
1697 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1698 v = newa0(char*, n);
66ebf6c0 1699
b734a4ff
LP
1700 for (p = contents; *p;) {
1701 const char *w = NULL;
b734a4ff 1702
9177fa9f
ZJS
1703 for (i = 0; i < n; i++)
1704 if (!v[i]) {
b734a4ff
LP
1705 w = first_word(p, keys[i]);
1706 if (w)
1707 break;
66ebf6c0 1708 }
66ebf6c0 1709
b734a4ff 1710 if (w) {
b734a4ff
LP
1711 size_t l;
1712
1713 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
1714 v[i] = strndup(w, l);
1715 if (!v[i]) {
b734a4ff
LP
1716 r = -ENOMEM;
1717 goto fail;
66ebf6c0 1718 }
b734a4ff 1719
b734a4ff 1720 n_done++;
b734a4ff
LP
1721 if (n_done >= n)
1722 goto done;
1723
1724 p = w + l;
9177fa9f 1725 } else
b734a4ff 1726 p += strcspn(p, NEWLINE);
b734a4ff
LP
1727
1728 p += strspn(p, NEWLINE);
66ebf6c0
TH
1729 }
1730
b734a4ff
LP
1731 r = -ENXIO;
1732
1733fail:
1734 for (i = 0; i < n; i++)
1735 free(v[i]);
1736
1737 return r;
1738
1739done:
1740 memcpy(ret_values, v, sizeof(char*) * n);
66ebf6c0 1741 return 0;
4ad49000
LP
1742}
1743
aae7e17f 1744int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
1745 _cleanup_free_ char *s = NULL;
1746 size_t n = 0, allocated = 0;
1747 bool space = false;
aae7e17f 1748 CGroupController c;
aae7e17f
FB
1749
1750 assert(ret);
1751
1752 if (mask == 0) {
1753 *ret = NULL;
1754 return 0;
1755 }
1756
1757 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
1758 const char *k;
1759 size_t l;
aae7e17f 1760
f99850a0 1761 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
1762 continue;
1763
ec635a2d
LP
1764 k = cgroup_controller_to_string(c);
1765 l = strlen(k);
1766
1767 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
1768 return -ENOMEM;
1769
1770 if (space)
1771 s[n] = ' ';
1772 memcpy(s + n + space, k, l);
1773 n += space + l;
1774
1775 space = true;
aae7e17f
FB
1776 }
1777
ec635a2d 1778 assert(s);
aae7e17f 1779
ec635a2d 1780 s[n] = 0;
ae2a15bc 1781 *ret = TAKE_PTR(s);
ec635a2d 1782
aae7e17f
FB
1783 return 0;
1784}
1785
38a90d45
LP
1786int cg_mask_from_string(const char *value, CGroupMask *ret) {
1787 CGroupMask m = 0;
1788
1789 assert(ret);
aae7e17f
FB
1790 assert(value);
1791
1792 for (;;) {
1793 _cleanup_free_ char *n = NULL;
1794 CGroupController v;
1795 int r;
1796
1797 r = extract_first_word(&value, &n, NULL, 0);
1798 if (r < 0)
1799 return r;
1800 if (r == 0)
1801 break;
1802
1803 v = cgroup_controller_from_string(n);
1804 if (v < 0)
1805 continue;
1806
38a90d45 1807 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 1808 }
38a90d45
LP
1809
1810 *ret = m;
aae7e17f
FB
1811 return 0;
1812}
1813
efdb0237 1814int cg_mask_supported(CGroupMask *ret) {
38a90d45 1815 CGroupMask mask;
415fc41c 1816 int r;
efdb0237 1817
67558d15
LP
1818 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
1819 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
1820 * pseudo-controllers. */
4ad49000 1821
b4cccbc1
LP
1822 r = cg_all_unified();
1823 if (r < 0)
1824 return r;
1825 if (r > 0) {
5f4c5fef 1826 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
1827
1828 /* In the unified hierarchy we can read the supported
1829 * and accessible controllers from a the top-level
1830 * cgroup attribute */
1831
5f4c5fef
LP
1832 r = cg_get_root_path(&root);
1833 if (r < 0)
1834 return r;
1835
1836 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1837 if (r < 0)
1838 return r;
1839
1840 r = read_one_line_file(path, &controllers);
efdb0237
LP
1841 if (r < 0)
1842 return r;
4ad49000 1843
aae7e17f
FB
1844 r = cg_mask_from_string(controllers, &mask);
1845 if (r < 0)
1846 return r;
efdb0237 1847
1fbbb526 1848 /* Mask controllers that are not supported in unified hierarchy. */
03afd780 1849 mask &= CGROUP_MASK_V2;
efdb0237
LP
1850
1851 } else {
1852 CGroupController c;
1853
03afd780 1854 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 1855
38a90d45 1856 mask = 0;
efdb0237 1857 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 1858 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
1859 const char *n;
1860
03afd780
LP
1861 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
1862 continue;
1863
efdb0237
LP
1864 n = cgroup_controller_to_string(c);
1865 if (controller_is_accessible(n) >= 0)
03afd780 1866 mask |= bit;
efdb0237 1867 }
4ad49000
LP
1868 }
1869
efdb0237
LP
1870 *ret = mask;
1871 return 0;
4ad49000 1872}
b12afc8c 1873
6925a0de
LP
1874int cg_kernel_controllers(Set **ret) {
1875 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 1876 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
1877 int r;
1878
6925a0de 1879 assert(ret);
b12afc8c 1880
f09e86bc
LS
1881 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
1882 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
1883 * pseudo-controllers. */
e155a0aa 1884
6925a0de
LP
1885 controllers = set_new(&string_hash_ops);
1886 if (!controllers)
1887 return -ENOMEM;
1888
fdeea3f4
ZJS
1889 r = fopen_unlocked("/proc/cgroups", "re", &f);
1890 if (r == -ENOENT) {
1891 *ret = NULL;
1892 return 0;
b12afc8c 1893 }
fdeea3f4
ZJS
1894 if (r < 0)
1895 return r;
35bbbf85 1896
b12afc8c 1897 /* Ignore the header line */
2351e44d 1898 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
1899
1900 for (;;) {
1901 char *controller;
1902 int enabled = 0;
1903
1904 errno = 0;
1905 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1906
1907 if (feof(f))
1908 break;
1909
66855de7
LP
1910 if (ferror(f))
1911 return errno_or_else(EIO);
b12afc8c
LP
1912
1913 return -EBADMSG;
1914 }
1915
1916 if (!enabled) {
1917 free(controller);
1918 continue;
1919 }
1920
efdb0237 1921 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
1922 free(controller);
1923 return -EBADMSG;
1924 }
1925
1926 r = set_consume(controllers, controller);
1927 if (r < 0)
1928 return r;
1929 }
1930
1cc6c93a 1931 *ret = TAKE_PTR(controllers);
6925a0de 1932
b12afc8c
LP
1933 return 0;
1934}
efdb0237 1935
d4d99bc6
ZJS
1936/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
1937 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
1938 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
1939 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
1940 * with other tools.
f08e9287 1941 *
d4d99bc6
ZJS
1942 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
1943 * cgroup v2 process management but disable the compat dual layout, we return true on
1944 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
f08e9287
TH
1945 */
1946static thread_local bool unified_systemd_v232;
1947
d4d99bc6
ZJS
1948int cg_unified_cached(bool flush) {
1949 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
efdb0237 1950
efdb0237
LP
1951 struct statfs fs;
1952
1953 /* Checks if we support the unified hierarchy. Returns an
1954 * error when the cgroup hierarchies aren't mounted yet or we
1955 * have any other trouble determining if the unified hierarchy
1956 * is supported. */
1957
d4d99bc6
ZJS
1958 if (flush)
1959 unified_cache = CGROUP_UNIFIED_UNKNOWN;
1960 else if (unified_cache >= CGROUP_UNIFIED_NONE)
1961 return unified_cache;
efdb0237
LP
1962
1963 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 1964 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 1965
9aa21133
ZJS
1966 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
1967 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 1968 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 1969 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 1970 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 1971 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 1972 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 1973 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 1974 unified_systemd_v232 = false;
f08e9287 1975 } else {
2977724b 1976 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 1977 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
1978
1979 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
1980 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
1981 unified_cache = CGROUP_UNIFIED_SYSTEMD;
1982 unified_systemd_v232 = true;
1983 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
1984 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
1985 unified_cache = CGROUP_UNIFIED_NONE;
1986 } else {
1987 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 1988 (unsigned long long) fs.f_type);
5535d8f7 1989 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 1990 }
2977724b 1991 }
baaa35ad
ZJS
1992 } else
1993 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
1994 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
1995 (unsigned long long)fs.f_type);
efdb0237 1996
d4d99bc6 1997 return unified_cache;
5da38d07
TH
1998}
1999
c22800e4 2000int cg_unified_controller(const char *controller) {
b4cccbc1 2001 int r;
5da38d07 2002
d4d99bc6 2003 r = cg_unified_cached(false);
b4cccbc1
LP
2004 if (r < 0)
2005 return r;
5da38d07 2006
d4d99bc6 2007 if (r == CGROUP_UNIFIED_NONE)
fc9ae717
LP
2008 return false;
2009
d4d99bc6 2010 if (r >= CGROUP_UNIFIED_ALL)
fc9ae717
LP
2011 return true;
2012
2013 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2014}
2015
b4cccbc1 2016int cg_all_unified(void) {
4bb652ac
LP
2017 int r;
2018
d4d99bc6 2019 r = cg_unified_cached(false);
4bb652ac
LP
2020 if (r < 0)
2021 return r;
2022
d4d99bc6 2023 return r >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2024}
2025
b4cccbc1
LP
2026int cg_hybrid_unified(void) {
2027 int r;
2977724b 2028
d4d99bc6 2029 r = cg_unified_cached(false);
b4cccbc1
LP
2030 if (r < 0)
2031 return r;
2977724b 2032
d4d99bc6 2033 return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
13c31542
TH
2034}
2035
9be57249
TH
2036const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2037 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2038 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2039 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2040 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2041};
2042
2043static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2044 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2045 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2046 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2047 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2048};
2049
2050DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2051
f0bef277
EV
2052bool is_cgroup_fs(const struct statfs *s) {
2053 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2054 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2055}
2056
2057bool fd_is_cgroup_fs(int fd) {
2058 struct statfs s;
2059
2060 if (fstatfs(fd, &s) < 0)
2061 return -errno;
2062
2063 return is_cgroup_fs(&s);
2064}
2065
b82f71c7 2066static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2067 [CGROUP_CONTROLLER_CPU] = "cpu",
2068 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
047f5d63 2069 [CGROUP_CONTROLLER_CPUSET] = "cpuset",
13c31542 2070 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2071 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2072 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2073 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2074 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2075 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2076 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2077};
2078
2079DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2080
2081CGroupMask get_cpu_accounting_mask(void) {
2082 static CGroupMask needed_mask = (CGroupMask) -1;
2083
2084 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2085 * provided externally from the CPU controller, which means we don't
2086 * need to enable the CPU controller just to get metrics. This is good,
2087 * because enabling the CPU controller comes at a minor performance
2088 * hit, especially when it's propagated deep into large hierarchies.
2089 * There's also no separate CPU accounting controller available within
2090 * a unified hierarchy.
2091 *
2092 * This combination of factors results in the desired cgroup mask to
2093 * enable for CPU accounting varying as follows:
2094 *
2095 * ╔═════════════════════╤═════════════════════╗
2096 * ║ Linux ≥4.15 │ Linux <4.15 ║
2097 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2098 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2099 * ╟───────────────╫─────────────────────┼─────────────────────╢
2100 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2101 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2102 *
2103 * We check kernel version here instead of manually checking whether
2104 * cpu.stat is present for every cgroup, as that check in itself would
2105 * already be fairly expensive.
2106 *
2107 * Kernels where this patch has been backported will therefore have the
2108 * CPU controller enabled unnecessarily. This is more expensive than
2109 * necessary, but harmless. ☺️
2110 */
2111
2112 if (needed_mask == (CGroupMask) -1) {
2113 if (cg_all_unified()) {
2114 struct utsname u;
2115 assert_se(uname(&u) >= 0);
2116
2117 if (str_verscmp(u.release, "4.15") < 0)
2118 needed_mask = CGROUP_MASK_CPU;
2119 else
2120 needed_mask = 0;
2121 } else
2122 needed_mask = CGROUP_MASK_CPUACCT;
2123 }
2124
2125 return needed_mask;
2126}
2127
2128bool cpu_accounting_is_cheap(void) {
2129 return get_cpu_accounting_mask() == 0;
2130}