]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
basic/cgroup-util: introduce cg_get_keyed_attribute_full()
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833
LP
2
3#include <errno.h>
84ac7bea 4#include <ftw.h>
11c3a366 5#include <limits.h>
8c6db833 6#include <signal.h>
11c3a366 7#include <stddef.h>
8c6db833 8#include <stdlib.h>
672c48cc 9#include <sys/types.h>
f98c2585 10#include <sys/utsname.h>
4b58153d 11#include <sys/xattr.h>
84ac7bea 12#include <unistd.h>
8c6db833 13
b5efdb8a 14#include "alloc-util.h"
3ffd4af2 15#include "cgroup-util.h"
93cc7779 16#include "def.h"
a0956174 17#include "dirent-util.h"
84ac7bea 18#include "extract-word.h"
3ffd4af2 19#include "fd-util.h"
84ac7bea 20#include "fileio.h"
f97b34a6 21#include "format-util.h"
f4f15635 22#include "fs-util.h"
93cc7779 23#include "log.h"
84ac7bea
LP
24#include "login-util.h"
25#include "macro.h"
f5947a5e 26#include "missing_magic.h"
84ac7bea 27#include "mkdir.h"
6bedfcbb 28#include "parse-util.h"
9eb977db 29#include "path-util.h"
84ac7bea
LP
30#include "process-util.h"
31#include "set.h"
9444b1f2 32#include "special.h"
872a590e 33#include "stat-util.h"
d054f0a4 34#include "stdio-util.h"
8b43440b 35#include "string-table.h"
07630cea 36#include "string-util.h"
aae7e17f 37#include "strv.h"
84ac7bea 38#include "unit-name.h"
b1d4f8e1 39#include "user-util.h"
baa358df 40#include "xattr-util.h"
8c6db833 41
e48fcfef 42static int cg_enumerate_items(const char *controller, const char *path, FILE **_f, const char *item) {
7027ff61 43 _cleanup_free_ char *fs = NULL;
c6c18be3 44 FILE *f;
7027ff61 45 int r;
c6c18be3 46
c6c18be3
LP
47 assert(_f);
48
e48fcfef 49 r = cg_get_path(controller, path, item, &fs);
c3175a7f 50 if (r < 0)
c6c18be3
LP
51 return r;
52
53 f = fopen(fs, "re");
c6c18be3
LP
54 if (!f)
55 return -errno;
56
57 *_f = f;
58 return 0;
59}
60
e48fcfef
TM
61int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
62 return cg_enumerate_items(controller, path, _f, "cgroup.procs");
63}
64
c6c18be3
LP
65int cg_read_pid(FILE *f, pid_t *_pid) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
70
7027ff61
LP
71 assert(f);
72 assert(_pid);
73
c6c18be3
LP
74 errno = 0;
75 if (fscanf(f, "%lu", &ul) != 1) {
76
77 if (feof(f))
78 return 0;
79
66855de7 80 return errno_or_else(EIO);
c6c18be3
LP
81 }
82
83 if (ul <= 0)
84 return -EIO;
85
86 *_pid = (pid_t) ul;
87 return 1;
88}
89
8b238b13
LP
90int cg_read_event(
91 const char *controller,
92 const char *path,
93 const char *event,
31a9be23 94 char **ret) {
8b238b13 95
ab2c3861 96 _cleanup_free_ char *events = NULL, *content = NULL;
ab2c3861
TH
97 int r;
98
99 r = cg_get_path(controller, path, "cgroup.events", &events);
100 if (r < 0)
101 return r;
102
103 r = read_full_file(events, &content, NULL);
104 if (r < 0)
105 return r;
106
31a9be23
YW
107 for (const char *p = content;;) {
108 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
109 const char *q;
110
111 r = extract_first_word(&p, &line, "\n", 0);
112 if (r < 0)
113 return r;
114 if (r == 0)
115 return -ENOENT;
116
117 q = line;
118 r = extract_first_word(&q, &key, " ", 0);
119 if (r < 0)
120 return r;
121 if (r == 0)
ab2c3861
TH
122 return -EINVAL;
123
31a9be23 124 if (!streq(key, event))
ab2c3861
TH
125 continue;
126
31a9be23
YW
127 val = strdup(q);
128 if (!val)
129 return -ENOMEM;
130
131 *ret = TAKE_PTR(val);
ab2c3861
TH
132 return 0;
133 }
ab2c3861
TH
134}
135
3228995c
CB
136bool cg_ns_supported(void) {
137 static thread_local int enabled = -1;
138
139 if (enabled >= 0)
140 return enabled;
141
0887fa71
LP
142 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
143 if (errno != ENOENT)
144 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
145 enabled = false;
146 } else
147 enabled = true;
3228995c
CB
148
149 return enabled;
150}
151
35d2e7ec 152int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 153 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
154 int r;
155 DIR *d;
156
35d2e7ec
LP
157 assert(_d);
158
159 /* This is not recursive! */
160
c3175a7f
LP
161 r = cg_get_path(controller, path, NULL, &fs);
162 if (r < 0)
35d2e7ec
LP
163 return r;
164
165 d = opendir(fs);
35d2e7ec
LP
166 if (!d)
167 return -errno;
168
169 *_d = d;
170 return 0;
171}
172
173int cg_read_subgroup(DIR *d, char **fn) {
174 struct dirent *de;
175
176 assert(d);
7027ff61 177 assert(fn);
35d2e7ec 178
f01327ad 179 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
180 char *b;
181
182 if (de->d_type != DT_DIR)
183 continue;
184
49bfc877 185 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
186 continue;
187
7027ff61
LP
188 b = strdup(de->d_name);
189 if (!b)
35d2e7ec
LP
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
35d2e7ec
LP
196 return 0;
197}
198
4ad49000 199int cg_rmdir(const char *controller, const char *path) {
7027ff61 200 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
201 int r;
202
ad293f5a
LP
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
35d2e7ec
LP
205 return r;
206
207 r = rmdir(p);
7027ff61
LP
208 if (r < 0 && errno != ENOENT)
209 return -errno;
35d2e7ec 210
b4cccbc1 211 r = cg_hybrid_unified();
f20db199 212 if (r <= 0)
b4cccbc1 213 return r;
b4cccbc1
LP
214
215 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
216 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
217 if (r < 0)
218 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
219 }
220
7027ff61 221 return 0;
35d2e7ec
LP
222}
223
e48fcfef 224static int cg_kill_items(
1d98fef1
LP
225 const char *controller,
226 const char *path,
227 int sig,
228 CGroupFlags flags,
229 Set *s,
230 cg_kill_log_func_t log_kill,
e48fcfef
TM
231 void *userdata,
232 const char *item) {
1d98fef1 233
7027ff61 234 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 235 bool done = false;
c53d2d54 236 int r, ret = 0, ret_log_kill = 0;
35d2e7ec 237 pid_t my_pid;
8c6db833 238
8c6db833
LP
239 assert(sig >= 0);
240
0d5b4810
LP
241 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
242 * SIGCONT on SIGKILL. */
243 if (IN_SET(sig, SIGCONT, SIGKILL))
244 flags &= ~CGROUP_SIGCONT;
245
8c6db833
LP
246 /* This goes through the tasks list and kills them all. This
247 * is repeated until no further processes are added to the
248 * tasks list, to properly handle forking processes */
249
7027ff61 250 if (!s) {
d5099efc 251 s = allocated_set = set_new(NULL);
7027ff61 252 if (!s)
ca949c9d 253 return -ENOMEM;
7027ff61 254 }
8c6db833 255
df0ff127 256 my_pid = getpid_cached();
8c6db833
LP
257
258 do {
7027ff61 259 _cleanup_fclose_ FILE *f = NULL;
0b172489 260 pid_t pid = 0;
8c6db833
LP
261 done = true;
262
e48fcfef 263 r = cg_enumerate_items(controller, path, &f, item);
7027ff61 264 if (r < 0) {
4c633005 265 if (ret >= 0 && r != -ENOENT)
7027ff61 266 return r;
35d2e7ec 267
7027ff61 268 return ret;
35d2e7ec 269 }
c6c18be3
LP
270
271 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 272
1d98fef1 273 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 274 continue;
8c6db833 275
fea72cc0 276 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 277 continue;
8c6db833 278
1d98fef1 279 if (log_kill)
c53d2d54 280 ret_log_kill = log_kill(pid, sig, userdata);
1d98fef1 281
8c6db833
LP
282 /* If we haven't killed this process yet, kill
283 * it */
4c633005
LP
284 if (kill(pid, sig) < 0) {
285 if (ret >= 0 && errno != ESRCH)
8c6db833 286 ret = -errno;
6e8314c4 287 } else {
1d98fef1 288 if (flags & CGROUP_SIGCONT)
e155a0aa 289 (void) kill(pid, SIGCONT);
430c18ed 290
c53d2d54
DB
291 if (ret == 0) {
292 if (log_kill)
293 ret = ret_log_kill;
294 else
295 ret = 1;
296 }
430c18ed 297 }
8c6db833 298
8c6db833
LP
299 done = false;
300
fea72cc0 301 r = set_put(s, PID_TO_PTR(pid));
7027ff61 302 if (r < 0) {
35d2e7ec 303 if (ret >= 0)
7027ff61 304 return r;
35d2e7ec 305
7027ff61 306 return ret;
35d2e7ec
LP
307 }
308 }
309
310 if (r < 0) {
311 if (ret >= 0)
7027ff61 312 return r;
35d2e7ec 313
7027ff61 314 return ret;
8c6db833
LP
315 }
316
8c6db833
LP
317 /* To avoid racing against processes which fork
318 * quicker than we can kill them we repeat this until
319 * no new pids need to be killed. */
320
35d2e7ec 321 } while (!done);
8c6db833 322
35d2e7ec 323 return ret;
8c6db833
LP
324}
325
e48fcfef
TM
326int cg_kill(
327 const char *controller,
328 const char *path,
329 int sig,
330 CGroupFlags flags,
331 Set *s,
332 cg_kill_log_func_t log_kill,
333 void *userdata) {
334 int r;
335
336 r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.procs");
337 if (r < 0 || sig != SIGKILL)
338 return r;
339
340 /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
cda5ccdb
TM
341 a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
342 (4340d175b898) and 4.14.138 (feb6b123b7dd). */
e48fcfef 343 r = cg_unified_controller(controller);
38288f0b 344 if (r <= 0)
e48fcfef 345 return r;
e48fcfef
TM
346
347 return cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.threads");
348}
349
1d98fef1
LP
350int cg_kill_recursive(
351 const char *controller,
352 const char *path,
353 int sig,
354 CGroupFlags flags,
355 Set *s,
356 cg_kill_log_func_t log_kill,
357 void *userdata) {
358
7027ff61
LP
359 _cleanup_set_free_ Set *allocated_set = NULL;
360 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 361 int r, ret;
35d2e7ec 362 char *fn;
8c6db833
LP
363
364 assert(path);
8c6db833
LP
365 assert(sig >= 0);
366
7027ff61 367 if (!s) {
d5099efc 368 s = allocated_set = set_new(NULL);
7027ff61 369 if (!s)
ca949c9d 370 return -ENOMEM;
7027ff61 371 }
ca949c9d 372
1d98fef1 373 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 374
7027ff61
LP
375 r = cg_enumerate_subgroups(controller, path, &d);
376 if (r < 0) {
4c633005 377 if (ret >= 0 && r != -ENOENT)
7027ff61 378 return r;
8c6db833 379
7027ff61 380 return ret;
35d2e7ec 381 }
8c6db833 382
35d2e7ec 383 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 384 _cleanup_free_ char *p = NULL;
8c6db833 385
95b21cff 386 p = path_join(empty_to_root(path), fn);
35d2e7ec 387 free(fn);
7027ff61
LP
388 if (!p)
389 return -ENOMEM;
8c6db833 390
1d98fef1 391 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 392 if (r != 0 && ret >= 0)
35d2e7ec 393 ret = r;
8c6db833 394 }
7027ff61 395 if (ret >= 0 && r < 0)
35d2e7ec
LP
396 ret = r;
397
1d98fef1 398 if (flags & CGROUP_REMOVE) {
4ad49000 399 r = cg_rmdir(controller, path);
4c701096 400 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
401 return r;
402 }
ca949c9d 403
8c6db833
LP
404 return ret;
405}
406
efdb0237
LP
407static const char *controller_to_dirname(const char *controller) {
408 const char *e;
3474ae3c 409
7027ff61
LP
410 assert(controller);
411
efdb0237
LP
412 /* Converts a controller name to the directory name below
413 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
414 * just cuts off the name= prefixed used for named
415 * hierarchies, if it is specified. */
416
2977724b 417 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 418 if (cg_hybrid_unified() > 0)
2977724b
TH
419 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
420 else
421 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
422 }
b6629c4b 423
efdb0237
LP
424 e = startswith(controller, "name=");
425 if (e)
426 return e;
427
428 return controller;
3474ae3c
LP
429}
430
569b19d8
LP
431static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
432 const char *dn;
018ef268 433 char *t = NULL;
3474ae3c 434
efdb0237 435 assert(fs);
569b19d8
LP
436 assert(controller);
437
438 dn = controller_to_dirname(controller);
efdb0237
LP
439
440 if (isempty(path) && isempty(suffix))
657ee2d8 441 t = path_join("/sys/fs/cgroup", dn);
efdb0237 442 else if (isempty(path))
657ee2d8 443 t = path_join("/sys/fs/cgroup", dn, suffix);
efdb0237 444 else if (isempty(suffix))
657ee2d8 445 t = path_join("/sys/fs/cgroup", dn, path);
efdb0237 446 else
657ee2d8 447 t = path_join("/sys/fs/cgroup", dn, path, suffix);
efdb0237
LP
448 if (!t)
449 return -ENOMEM;
3474ae3c 450
efdb0237
LP
451 *fs = t;
452 return 0;
453}
454
455static int join_path_unified(const char *path, const char *suffix, char **fs) {
456 char *t;
457
458 assert(fs);
459
460 if (isempty(path) && isempty(suffix))
461 t = strdup("/sys/fs/cgroup");
462 else if (isempty(path))
657ee2d8 463 t = path_join("/sys/fs/cgroup", suffix);
efdb0237 464 else if (isempty(suffix))
657ee2d8 465 t = path_join("/sys/fs/cgroup", path);
efdb0237 466 else
657ee2d8 467 t = path_join("/sys/fs/cgroup", path, suffix);
3474ae3c
LP
468 if (!t)
469 return -ENOMEM;
470
efdb0237 471 *fs = t;
3474ae3c
LP
472 return 0;
473}
474
8c6db833 475int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 476 int r;
8c6db833 477
dbd821ac
LP
478 assert(fs);
479
efdb0237
LP
480 if (!controller) {
481 char *t;
482
569b19d8
LP
483 /* If no controller is specified, we return the path
484 * *below* the controllers, without any prefix. */
efdb0237
LP
485
486 if (!path && !suffix)
487 return -EINVAL;
488
989189ea 489 if (!suffix)
efdb0237 490 t = strdup(path);
989189ea 491 else if (!path)
efdb0237
LP
492 t = strdup(suffix);
493 else
657ee2d8 494 t = path_join(path, suffix);
efdb0237
LP
495 if (!t)
496 return -ENOMEM;
497
858d36c1 498 *fs = path_simplify(t, false);
efdb0237
LP
499 return 0;
500 }
501
502 if (!cg_controller_is_valid(controller))
78edb35a
LP
503 return -EINVAL;
504
b4cccbc1
LP
505 r = cg_all_unified();
506 if (r < 0)
507 return r;
508 if (r > 0)
efdb0237 509 r = join_path_unified(path, suffix, fs);
569b19d8
LP
510 else
511 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
512 if (r < 0)
513 return r;
7027ff61 514
858d36c1 515 path_simplify(*fs, false);
efdb0237 516 return 0;
3474ae3c 517}
dbd821ac 518
efdb0237 519static int controller_is_accessible(const char *controller) {
b4cccbc1 520 int r;
37099707 521
efdb0237 522 assert(controller);
37099707 523
efdb0237
LP
524 /* Checks whether a specific controller is accessible,
525 * i.e. its hierarchy mounted. In the unified hierarchy all
526 * controllers are considered accessible, except for the named
527 * hierarchies */
b12afc8c 528
efdb0237
LP
529 if (!cg_controller_is_valid(controller))
530 return -EINVAL;
531
b4cccbc1
LP
532 r = cg_all_unified();
533 if (r < 0)
534 return r;
535 if (r > 0) {
efdb0237
LP
536 /* We don't support named hierarchies if we are using
537 * the unified hierarchy. */
538
539 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
540 return 0;
541
542 if (startswith(controller, "name="))
543 return -EOPNOTSUPP;
544
545 } else {
546 const char *cc, *dn;
547
548 dn = controller_to_dirname(controller);
549 cc = strjoina("/sys/fs/cgroup/", dn);
550
551 if (laccess(cc, F_OK) < 0)
552 return -errno;
553 }
37099707
LP
554
555 return 0;
556}
557
3474ae3c 558int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 559 int r;
dbd821ac 560
efdb0237 561 assert(controller);
3474ae3c 562 assert(fs);
70132bd0 563
efdb0237
LP
564 /* Check if the specified controller is actually accessible */
565 r = controller_is_accessible(controller);
37099707
LP
566 if (r < 0)
567 return r;
3474ae3c 568
efdb0237 569 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
570}
571
4b58153d
LP
572int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
573 _cleanup_free_ char *fs = NULL;
574 int r;
575
576 assert(path);
577 assert(name);
578 assert(value || size <= 0);
579
580 r = cg_get_path(controller, path, NULL, &fs);
581 if (r < 0)
582 return r;
583
584 if (setxattr(fs, name, value, size, flags) < 0)
585 return -errno;
586
587 return 0;
588}
589
590int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
591 _cleanup_free_ char *fs = NULL;
592 ssize_t n;
593 int r;
594
595 assert(path);
596 assert(name);
597
598 r = cg_get_path(controller, path, NULL, &fs);
599 if (r < 0)
600 return r;
601
602 n = getxattr(fs, name, value, size);
603 if (n < 0)
604 return -errno;
605
606 return (int) n;
607}
608
baa358df
AZ
609int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret) {
610 _cleanup_free_ char *fs = NULL;
611 int r;
612
613 assert(path);
614 assert(name);
615
616 r = cg_get_path(controller, path, NULL, &fs);
617 if (r < 0)
618 return r;
619
620 r = getxattr_malloc(fs, name, ret, false);
621 if (r < 0)
622 return r;
623
624 return r;
625}
626
bf25f165
LP
627int cg_remove_xattr(const char *controller, const char *path, const char *name) {
628 _cleanup_free_ char *fs = NULL;
629 int r;
630
631 assert(path);
632 assert(name);
633
634 r = cg_get_path(controller, path, NULL, &fs);
635 if (r < 0)
636 return r;
637
638 if (removexattr(fs, name) < 0)
639 return -errno;
640
641 return 0;
642}
643
7027ff61 644int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 645 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 646 const char *fs, *controller_str;
d2b39cb6 647 int unified, r;
efdb0237 648 size_t cs = 0;
8c6db833 649
8c6db833 650 assert(path);
c6c18be3 651 assert(pid >= 0);
8c6db833 652
5da38d07
TH
653 if (controller) {
654 if (!cg_controller_is_valid(controller))
655 return -EINVAL;
656 } else
657 controller = SYSTEMD_CGROUP_CONTROLLER;
658
c22800e4 659 unified = cg_unified_controller(controller);
b4cccbc1
LP
660 if (unified < 0)
661 return unified;
662 if (unified == 0) {
b6629c4b
TH
663 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
664 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
665 else
666 controller_str = controller;
667
668 cs = strlen(controller_str);
669 }
7027ff61 670
b68fa010 671 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
672 r = fopen_unlocked(fs, "re", &f);
673 if (r == -ENOENT)
674 return -ESRCH;
675 if (r < 0)
676 return r;
35bbbf85 677
d2b39cb6
LP
678 for (;;) {
679 _cleanup_free_ char *line = NULL;
efdb0237 680 char *e, *p;
c6c18be3 681
d2b39cb6
LP
682 r = read_line(f, LONG_LINE_MAX, &line);
683 if (r < 0)
684 return r;
685 if (r == 0)
686 break;
c6c18be3 687
efdb0237
LP
688 if (unified) {
689 e = startswith(line, "0:");
690 if (!e)
691 continue;
c6c18be3 692
efdb0237
LP
693 e = strchr(e, ':');
694 if (!e)
695 continue;
696 } else {
697 char *l;
698 size_t k;
699 const char *word, *state;
700 bool found = false;
701
702 l = strchr(line, ':');
703 if (!l)
704 continue;
8af8afd6 705
efdb0237
LP
706 l++;
707 e = strchr(l, ':');
708 if (!e)
709 continue;
8af8afd6 710
efdb0237 711 *e = 0;
00d4b1e6 712 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 713 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
714 found = true;
715 break;
716 }
efdb0237
LP
717 if (!found)
718 continue;
8af8afd6
LP
719 }
720
8af8afd6 721 p = strdup(e + 1);
7027ff61
LP
722 if (!p)
723 return -ENOMEM;
c6c18be3 724
5e20b0a4
LP
725 /* Truncate suffix indicating the process is a zombie */
726 e = endswith(p, " (deleted)");
727 if (e)
728 *e = 0;
729
c6c18be3 730 *path = p;
7027ff61 731 return 0;
c6c18be3
LP
732 }
733
1c80e425 734 return -ENODATA;
8c6db833
LP
735}
736
737int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 738 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 739 const char *sc;
415fc41c 740 int r;
8c6db833 741
8c6db833
LP
742 assert(agent);
743
c22800e4 744 r = cg_unified_controller(controller);
b4cccbc1
LP
745 if (r < 0)
746 return r;
747 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
748 return -EOPNOTSUPP;
749
7027ff61
LP
750 r = cg_get_path(controller, NULL, "release_agent", &fs);
751 if (r < 0)
c6c18be3 752 return r;
8c6db833 753
7027ff61
LP
754 r = read_one_line_file(fs, &contents);
755 if (r < 0)
756 return r;
8c6db833
LP
757
758 sc = strstrip(contents);
e155a0aa 759 if (isempty(sc)) {
604028de 760 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 761 if (r < 0)
7027ff61 762 return r;
b8725df8 763 } else if (!path_equal(sc, agent))
7027ff61 764 return -EEXIST;
8c6db833 765
0da16248 766 fs = mfree(fs);
7027ff61
LP
767 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
768 if (r < 0)
769 return r;
8c6db833 770
0da16248 771 contents = mfree(contents);
7027ff61
LP
772 r = read_one_line_file(fs, &contents);
773 if (r < 0)
774 return r;
8c6db833
LP
775
776 sc = strstrip(contents);
8c6db833 777 if (streq(sc, "0")) {
604028de 778 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
779 if (r < 0)
780 return r;
c6c18be3 781
7027ff61
LP
782 return 1;
783 }
8c6db833 784
7027ff61
LP
785 if (!streq(sc, "1"))
786 return -EIO;
8c6db833 787
7027ff61 788 return 0;
8c6db833
LP
789}
790
ad929bcc
KS
791int cg_uninstall_release_agent(const char *controller) {
792 _cleanup_free_ char *fs = NULL;
415fc41c 793 int r;
efdb0237 794
c22800e4 795 r = cg_unified_controller(controller);
b4cccbc1
LP
796 if (r < 0)
797 return r;
798 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 799 return -EOPNOTSUPP;
ad929bcc 800
ac9ef333
LP
801 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
802 if (r < 0)
803 return r;
804
604028de 805 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
806 if (r < 0)
807 return r;
808
0da16248 809 fs = mfree(fs);
ac9ef333 810
ad929bcc
KS
811 r = cg_get_path(controller, NULL, "release_agent", &fs);
812 if (r < 0)
813 return r;
814
604028de 815 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
816 if (r < 0)
817 return r;
818
ac9ef333 819 return 0;
ad929bcc
KS
820}
821
6f883237 822int cg_is_empty(const char *controller, const char *path) {
7027ff61 823 _cleanup_fclose_ FILE *f = NULL;
efdb0237 824 pid_t pid;
7027ff61 825 int r;
8c6db833 826
8c6db833
LP
827 assert(path);
828
b043cd0b 829 r = cg_enumerate_processes(controller, path, &f);
6f883237 830 if (r == -ENOENT)
1bcf3fc6 831 return true;
c3175a7f 832 if (r < 0)
6f883237 833 return r;
8c6db833 834
6f883237 835 r = cg_read_pid(f, &pid);
c6c18be3
LP
836 if (r < 0)
837 return r;
8c6db833 838
6f883237 839 return r == 0;
8c6db833
LP
840}
841
6f883237 842int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 843 int r;
8c6db833 844
8c6db833
LP
845 assert(path);
846
6fd66507 847 /* The root cgroup is always populated */
57ea45e1 848 if (controller && empty_or_root(path))
efdb0237 849 return false;
6fd66507 850
c22800e4 851 r = cg_unified_controller(controller);
b4cccbc1
LP
852 if (r < 0)
853 return r;
854 if (r > 0) {
ab2c3861 855 _cleanup_free_ char *t = NULL;
8c6db833 856
efdb0237 857 /* On the unified hierarchy we can check empty state
ab2c3861 858 * via the "populated" attribute of "cgroup.events". */
8c6db833 859
ab2c3861 860 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
861 if (r == -ENOENT)
862 return true;
efdb0237
LP
863 if (r < 0)
864 return r;
865
866 return streq(t, "0");
867 } else {
868 _cleanup_closedir_ DIR *d = NULL;
869 char *fn;
8c6db833 870
efdb0237 871 r = cg_is_empty(controller, path);
35d2e7ec 872 if (r <= 0)
7027ff61 873 return r;
35d2e7ec 874
efdb0237
LP
875 r = cg_enumerate_subgroups(controller, path, &d);
876 if (r == -ENOENT)
1bcf3fc6 877 return true;
efdb0237
LP
878 if (r < 0)
879 return r;
35d2e7ec 880
efdb0237
LP
881 while ((r = cg_read_subgroup(d, &fn)) > 0) {
882 _cleanup_free_ char *p = NULL;
883
657ee2d8 884 p = path_join(path, fn);
efdb0237
LP
885 free(fn);
886 if (!p)
887 return -ENOMEM;
888
889 r = cg_is_empty_recursive(controller, p);
890 if (r <= 0)
891 return r;
892 }
893 if (r < 0)
894 return r;
895
896 return true;
897 }
35d2e7ec
LP
898}
899
2a8020fe
ZJS
900int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
901 _cleanup_free_ char *controller = NULL, *path = NULL;
35d2e7ec
LP
902
903 assert(spec);
35d2e7ec
LP
904
905 if (*spec == '/') {
99be45a4 906 if (!path_is_normalized(spec))
e884315e 907 return -EINVAL;
35d2e7ec 908
2a8020fe
ZJS
909 if (ret_path) {
910 path = strdup(spec);
911 if (!path)
35d2e7ec
LP
912 return -ENOMEM;
913
2a8020fe 914 path_simplify(path, false);
8c6db833
LP
915 }
916
2a8020fe
ZJS
917 } else {
918 const char *e;
35d2e7ec 919
2a8020fe
ZJS
920 e = strchr(spec, ':');
921 if (e) {
922 controller = strndup(spec, e-spec);
923 if (!controller)
35d2e7ec 924 return -ENOMEM;
2a8020fe
ZJS
925 if (!cg_controller_is_valid(controller))
926 return -EINVAL;
35d2e7ec 927
2a8020fe
ZJS
928 if (!isempty(e + 1)) {
929 path = strdup(e+1);
930 if (!path)
931 return -ENOMEM;
35d2e7ec 932
2a8020fe
ZJS
933 if (!path_is_normalized(path) ||
934 !path_is_absolute(path))
935 return -EINVAL;
8c6db833 936
2a8020fe
ZJS
937 path_simplify(path, false);
938 }
246aa6dd 939
2a8020fe
ZJS
940 } else {
941 if (!cg_controller_is_valid(spec))
942 return -EINVAL;
35d2e7ec 943
2a8020fe
ZJS
944 if (ret_controller) {
945 controller = strdup(spec);
946 if (!controller)
947 return -ENOMEM;
948 }
baa89da4 949 }
baa89da4 950 }
5954c074 951
2a8020fe
ZJS
952 if (ret_controller)
953 *ret_controller = TAKE_PTR(controller);
954 if (ret_path)
955 *ret_path = TAKE_PTR(path);
35d2e7ec 956 return 0;
8c6db833 957}
c6c18be3 958
7027ff61 959int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
960 _cleanup_free_ char *c = NULL, *p = NULL;
961 char *t;
35d2e7ec
LP
962 int r;
963
964 assert(path);
965 assert(result);
966
73e231ab 967 /* First, check if it already is a filesystem path */
7027ff61 968 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 969
b69d29ce
LP
970 t = strdup(path);
971 if (!t)
35d2e7ec
LP
972 return -ENOMEM;
973
858d36c1 974 *result = path_simplify(t, false);
35d2e7ec
LP
975 return 0;
976 }
977
73e231ab 978 /* Otherwise, treat it as cg spec */
b69d29ce
LP
979 r = cg_split_spec(path, &c, &p);
980 if (r < 0)
35d2e7ec
LP
981 return r;
982
efdb0237 983 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 984}
1f73f0f1 985
7027ff61 986int cg_get_root_path(char **path) {
9444b1f2 987 char *p, *e;
7027ff61
LP
988 int r;
989
990 assert(path);
991
9444b1f2 992 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
993 if (r < 0)
994 return r;
995
efdb0237
LP
996 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
997 if (!e)
998 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
999 if (!e)
1000 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1001 if (e)
7027ff61
LP
1002 *e = 0;
1003
1f73f0f1
LP
1004 *path = p;
1005 return 0;
1006}
b59e2465 1007
751bc6ac
LP
1008int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1009 _cleanup_free_ char *rt = NULL;
1010 char *p;
ba1261bc
LP
1011 int r;
1012
e9174f29 1013 assert(cgroup);
751bc6ac 1014 assert(shifted);
e9174f29
LP
1015
1016 if (!root) {
1017 /* If the root was specified let's use that, otherwise
1018 * let's determine it from PID 1 */
1019
751bc6ac 1020 r = cg_get_root_path(&rt);
e9174f29
LP
1021 if (r < 0)
1022 return r;
1023
751bc6ac 1024 root = rt;
e9174f29 1025 }
ba1261bc 1026
751bc6ac 1027 p = path_startswith(cgroup, root);
efdb0237 1028 if (p && p > cgroup)
751bc6ac
LP
1029 *shifted = p - 1;
1030 else
1031 *shifted = cgroup;
1032
1033 return 0;
1034}
1035
1036int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1037 _cleanup_free_ char *raw = NULL;
1038 const char *c;
1039 int r;
1040
1041 assert(pid >= 0);
1042 assert(cgroup);
1043
1044 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1045 if (r < 0)
ba1261bc 1046 return r;
ba1261bc 1047
751bc6ac
LP
1048 r = cg_shift_path(raw, root, &c);
1049 if (r < 0)
1050 return r;
ba1261bc 1051
ae2a15bc
LP
1052 if (c == raw)
1053 *cgroup = TAKE_PTR(raw);
1054 else {
751bc6ac 1055 char *n;
ba1261bc 1056
751bc6ac
LP
1057 n = strdup(c);
1058 if (!n)
ba1261bc 1059 return -ENOMEM;
ba1261bc 1060
751bc6ac
LP
1061 *cgroup = n;
1062 }
ba1261bc
LP
1063
1064 return 0;
1065}
1066
9ed794a3 1067int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1068 char *c, *s;
1069 size_t n;
ef1673d1
MT
1070
1071 assert(cgroup);
6c03089c 1072 assert(unit);
ef1673d1 1073
8b0849e9
LP
1074 n = strcspn(cgroup, "/");
1075 if (n < 3)
1076 return -ENXIO;
1077
1078 c = strndupa(cgroup, n);
ae018d9b 1079 c = cg_unescape(c);
ef1673d1 1080
7410616c 1081 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1082 return -ENXIO;
ef1673d1 1083
d7bd3de0 1084 s = strdup(c);
6c03089c
LP
1085 if (!s)
1086 return -ENOMEM;
1087
1088 *unit = s;
ef1673d1
MT
1089 return 0;
1090}
1091
8b0849e9
LP
1092static bool valid_slice_name(const char *p, size_t n) {
1093
1094 if (!p)
1095 return false;
1096
fbd0b64f 1097 if (n < STRLEN("x.slice"))
8b0849e9
LP
1098 return false;
1099
1100 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1101 char buf[n+1], *c;
1102
1103 memcpy(buf, p, n);
1104 buf[n] = 0;
1105
1106 c = cg_unescape(buf);
1107
7410616c 1108 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1109 }
1110
1111 return false;
1112}
1113
9444b1f2 1114static const char *skip_slices(const char *p) {
8b0849e9
LP
1115 assert(p);
1116
9444b1f2
LP
1117 /* Skips over all slice assignments */
1118
1119 for (;;) {
1021b21b
LP
1120 size_t n;
1121
9444b1f2
LP
1122 p += strspn(p, "/");
1123
1124 n = strcspn(p, "/");
8b0849e9 1125 if (!valid_slice_name(p, n))
9444b1f2
LP
1126 return p;
1127
1128 p += n;
1129 }
1130}
1131
8b0849e9 1132int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1133 const char *e;
8b0849e9
LP
1134 char *unit;
1135 int r;
6c03089c
LP
1136
1137 assert(path);
8b0849e9 1138 assert(ret);
6c03089c 1139
9444b1f2 1140 e = skip_slices(path);
6c03089c 1141
8b0849e9
LP
1142 r = cg_path_decode_unit(e, &unit);
1143 if (r < 0)
1144 return r;
1145
1146 /* We skipped over the slices, don't accept any now */
1147 if (endswith(unit, ".slice")) {
1148 free(unit);
1149 return -ENXIO;
1150 }
1151
1152 *ret = unit;
1153 return 0;
6c03089c
LP
1154}
1155
1156int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1157 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1158 int r;
ba1261bc 1159
ef1673d1
MT
1160 assert(unit);
1161
7027ff61 1162 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1163 if (r < 0)
1164 return r;
1165
6c03089c
LP
1166 return cg_path_get_unit(cgroup, unit);
1167}
ef1673d1 1168
d4fffc4b
ZJS
1169/**
1170 * Skip session-*.scope, but require it to be there.
1171 */
9444b1f2
LP
1172static const char *skip_session(const char *p) {
1173 size_t n;
1174
8b0849e9
LP
1175 if (isempty(p))
1176 return NULL;
9444b1f2
LP
1177
1178 p += strspn(p, "/");
1179
1180 n = strcspn(p, "/");
fbd0b64f 1181 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1182 return NULL;
1183
8b0849e9
LP
1184 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1185 char buf[n - 8 - 6 + 1];
1186
1187 memcpy(buf, p + 8, n - 8 - 6);
1188 buf[n - 8 - 6] = 0;
d4fffc4b 1189
8b0849e9
LP
1190 /* Note that session scopes never need unescaping,
1191 * since they cannot conflict with the kernel's own
1192 * names, hence we don't need to call cg_unescape()
1193 * here. */
1194
1195 if (!session_id_valid(buf))
1196 return false;
1197
1198 p += n;
1199 p += strspn(p, "/");
1200 return p;
1201 }
1202
1203 return NULL;
d4fffc4b
ZJS
1204}
1205
1206/**
1207 * Skip user@*.service, but require it to be there.
1208 */
1209static const char *skip_user_manager(const char *p) {
1210 size_t n;
1211
8b0849e9
LP
1212 if (isempty(p))
1213 return NULL;
d4fffc4b
ZJS
1214
1215 p += strspn(p, "/");
1216
1217 n = strcspn(p, "/");
fbd0b64f 1218 if (n < STRLEN("user@x.service"))
6c03089c 1219 return NULL;
ef1673d1 1220
8b0849e9
LP
1221 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1222 char buf[n - 5 - 8 + 1];
9444b1f2 1223
8b0849e9
LP
1224 memcpy(buf, p + 5, n - 5 - 8);
1225 buf[n - 5 - 8] = 0;
1226
1227 /* Note that user manager services never need unescaping,
1228 * since they cannot conflict with the kernel's own
1229 * names, hence we don't need to call cg_unescape()
1230 * here. */
1231
1232 if (parse_uid(buf, NULL) < 0)
1233 return NULL;
1234
1235 p += n;
1236 p += strspn(p, "/");
1237
1238 return p;
1239 }
1240
1241 return NULL;
9444b1f2
LP
1242}
1243
329ac4bc 1244static const char *skip_user_prefix(const char *path) {
d4fffc4b 1245 const char *e, *t;
ef1673d1 1246
6c03089c 1247 assert(path);
ba1261bc 1248
9444b1f2
LP
1249 /* Skip slices, if there are any */
1250 e = skip_slices(path);
ba1261bc 1251
329ac4bc 1252 /* Skip the user manager, if it's in the path now... */
8b0849e9 1253 t = skip_user_manager(e);
329ac4bc
LP
1254 if (t)
1255 return t;
8b0849e9 1256
329ac4bc
LP
1257 /* Alternatively skip the user session if it is in the path... */
1258 return skip_session(e);
1259}
32081481 1260
329ac4bc
LP
1261int cg_path_get_user_unit(const char *path, char **ret) {
1262 const char *t;
6c03089c 1263
329ac4bc
LP
1264 assert(path);
1265 assert(ret);
8b0849e9 1266
329ac4bc
LP
1267 t = skip_user_prefix(path);
1268 if (!t)
8b0849e9 1269 return -ENXIO;
8b0849e9 1270
bf21be10
LP
1271 /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1272 * parser. */
329ac4bc 1273 return cg_path_get_unit(t, ret);
ef1673d1 1274}
ba1261bc 1275
ef1673d1 1276int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1277 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1278 int r;
1279
1280 assert(unit);
1281
7027ff61 1282 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1283 if (r < 0)
1284 return r;
1285
1286 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1287}
e884315e 1288
7027ff61 1289int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1290 _cleanup_free_ char *u = NULL;
1291 const char *sl;
89f7c846 1292 int r;
374ec6ab 1293
89f7c846
LP
1294 r = cg_path_get_unit(path, &u);
1295 if (r < 0)
1296 return r;
7027ff61 1297
efdb0237 1298 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1299 return readlink_malloc(sl, machine);
7027ff61
LP
1300}
1301
1302int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1303 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1304 int r;
1305
1306 assert(machine);
1307
1308 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1309 if (r < 0)
1310 return r;
1311
1312 return cg_path_get_machine_name(cgroup, machine);
1313}
1314
1315int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1316 _cleanup_free_ char *unit = NULL;
1317 char *start, *end;
1318 int r;
7027ff61
LP
1319
1320 assert(path);
7027ff61 1321
8b0849e9
LP
1322 r = cg_path_get_unit(path, &unit);
1323 if (r < 0)
1324 return r;
7027ff61 1325
8b0849e9
LP
1326 start = startswith(unit, "session-");
1327 if (!start)
cfeaa44a 1328 return -ENXIO;
8b0849e9
LP
1329 end = endswith(start, ".scope");
1330 if (!end)
cfeaa44a 1331 return -ENXIO;
8b0849e9
LP
1332
1333 *end = 0;
1334 if (!session_id_valid(start))
cfeaa44a 1335 return -ENXIO;
374ec6ab 1336
af08d2f9 1337 if (session) {
8b0849e9 1338 char *rr;
af08d2f9 1339
8b0849e9
LP
1340 rr = strdup(start);
1341 if (!rr)
af08d2f9
LP
1342 return -ENOMEM;
1343
8b0849e9 1344 *session = rr;
af08d2f9 1345 }
7027ff61 1346
7027ff61
LP
1347 return 0;
1348}
1349
1350int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1351 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1352 int r;
1353
7027ff61
LP
1354 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1355 if (r < 0)
1356 return r;
1357
1358 return cg_path_get_session(cgroup, session);
1359}
1360
ae018d9b 1361int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1362 _cleanup_free_ char *slice = NULL;
8b0849e9 1363 char *start, *end;
374ec6ab 1364 int r;
ae018d9b
LP
1365
1366 assert(path);
ae018d9b 1367
374ec6ab
LP
1368 r = cg_path_get_slice(path, &slice);
1369 if (r < 0)
1370 return r;
ae018d9b 1371
674eb685
LP
1372 start = startswith(slice, "user-");
1373 if (!start)
cfeaa44a 1374 return -ENXIO;
8b0849e9 1375 end = endswith(start, ".slice");
674eb685 1376 if (!end)
cfeaa44a 1377 return -ENXIO;
ae018d9b 1378
8b0849e9
LP
1379 *end = 0;
1380 if (parse_uid(start, uid) < 0)
cfeaa44a 1381 return -ENXIO;
674eb685 1382
674eb685 1383 return 0;
ae018d9b
LP
1384}
1385
1386int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1387 _cleanup_free_ char *cgroup = NULL;
1388 int r;
1389
ae018d9b
LP
1390 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1391 if (r < 0)
1392 return r;
1393
1394 return cg_path_get_owner_uid(cgroup, uid);
1395}
1396
1021b21b
LP
1397int cg_path_get_slice(const char *p, char **slice) {
1398 const char *e = NULL;
1021b21b
LP
1399
1400 assert(p);
1401 assert(slice);
1402
329ac4bc
LP
1403 /* Finds the right-most slice unit from the beginning, but
1404 * stops before we come to the first non-slice unit. */
1405
1021b21b
LP
1406 for (;;) {
1407 size_t n;
1408
1409 p += strspn(p, "/");
1410
1411 n = strcspn(p, "/");
8b0849e9 1412 if (!valid_slice_name(p, n)) {
1021b21b 1413
8b0849e9
LP
1414 if (!e) {
1415 char *s;
1021b21b 1416
e5d855d3 1417 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1418 if (!s)
1419 return -ENOMEM;
1021b21b 1420
8b0849e9
LP
1421 *slice = s;
1422 return 0;
1423 }
1424
1425 return cg_path_decode_unit(e, slice);
1021b21b
LP
1426 }
1427
1428 e = p;
1021b21b
LP
1429 p += n;
1430 }
1431}
1432
1433int cg_pid_get_slice(pid_t pid, char **slice) {
1434 _cleanup_free_ char *cgroup = NULL;
1435 int r;
1436
1437 assert(slice);
1438
1439 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1440 if (r < 0)
1441 return r;
1442
1443 return cg_path_get_slice(cgroup, slice);
1444}
1445
329ac4bc
LP
1446int cg_path_get_user_slice(const char *p, char **slice) {
1447 const char *t;
1448 assert(p);
1449 assert(slice);
1450
1451 t = skip_user_prefix(p);
1452 if (!t)
1453 return -ENXIO;
1454
1455 /* And now it looks pretty much the same as for a system
1456 * slice, so let's just use the same parser from here on. */
1457 return cg_path_get_slice(t, slice);
1458}
1459
1460int cg_pid_get_user_slice(pid_t pid, char **slice) {
1461 _cleanup_free_ char *cgroup = NULL;
1462 int r;
1463
1464 assert(slice);
1465
1466 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1467 if (r < 0)
1468 return r;
1469
1470 return cg_path_get_user_slice(cgroup, slice);
1471}
1472
ae018d9b
LP
1473char *cg_escape(const char *p) {
1474 bool need_prefix = false;
1475
1476 /* This implements very minimal escaping for names to be used
1477 * as file names in the cgroup tree: any name which might
1478 * conflict with a kernel name or is prefixed with '_' is
1479 * prefixed with a '_'. That way, when reading cgroup names it
1480 * is sufficient to remove a single prefixing underscore if
1481 * there is one. */
1482
1483 /* The return value of this function (unlike cg_unescape())
1484 * needs free()! */
1485
4c701096 1486 if (IN_SET(p[0], 0, '_', '.') ||
0cbd293e 1487 STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
efdb0237 1488 startswith(p, "cgroup."))
ae018d9b
LP
1489 need_prefix = true;
1490 else {
1491 const char *dot;
1492
1493 dot = strrchr(p, '.');
1494 if (dot) {
efdb0237
LP
1495 CGroupController c;
1496 size_t l = dot - p;
ae018d9b 1497
efdb0237
LP
1498 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1499 const char *n;
1500
1501 n = cgroup_controller_to_string(c);
ae018d9b 1502
efdb0237
LP
1503 if (l != strlen(n))
1504 continue;
ae018d9b 1505
efdb0237
LP
1506 if (memcmp(p, n, l) != 0)
1507 continue;
1508
1509 need_prefix = true;
1510 break;
ae018d9b
LP
1511 }
1512 }
1513 }
1514
1515 if (need_prefix)
b910cc72 1516 return strjoin("_", p);
efdb0237
LP
1517
1518 return strdup(p);
ae018d9b
LP
1519}
1520
1521char *cg_unescape(const char *p) {
1522 assert(p);
1523
1524 /* The return value of this function (unlike cg_escape())
1525 * doesn't need free()! */
1526
1527 if (p[0] == '_')
1528 return (char*) p+1;
1529
1530 return (char*) p;
1531}
78edb35a
LP
1532
1533#define CONTROLLER_VALID \
4b549144 1534 DIGITS LETTERS \
78edb35a
LP
1535 "_"
1536
185a0874 1537bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1538 const char *t, *s;
1539
1540 if (!p)
1541 return false;
1542
b6629c4b
TH
1543 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1544 return true;
1545
185a0874
DJL
1546 s = startswith(p, "name=");
1547 if (s)
1548 p = s;
78edb35a 1549
4c701096 1550 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1551 return false;
1552
1553 for (t = p; *t; t++)
1554 if (!strchr(CONTROLLER_VALID, *t))
1555 return false;
1556
1557 if (t - p > FILENAME_MAX)
1558 return false;
1559
1560 return true;
1561}
a016b922
LP
1562
1563int cg_slice_to_path(const char *unit, char **ret) {
1564 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1565 const char *dash;
7410616c 1566 int r;
a016b922
LP
1567
1568 assert(unit);
1569 assert(ret);
1570
e5d855d3 1571 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1572 char *x;
1573
1574 x = strdup("");
1575 if (!x)
1576 return -ENOMEM;
1577 *ret = x;
1578 return 0;
1579 }
1580
7410616c 1581 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1582 return -EINVAL;
1583
1584 if (!endswith(unit, ".slice"))
1585 return -EINVAL;
1586
7410616c
LP
1587 r = unit_name_to_prefix(unit, &p);
1588 if (r < 0)
1589 return r;
a016b922
LP
1590
1591 dash = strchr(p, '-');
e66e5b61
LP
1592
1593 /* Don't allow initial dashes */
1594 if (dash == p)
1595 return -EINVAL;
1596
a016b922
LP
1597 while (dash) {
1598 _cleanup_free_ char *escaped = NULL;
1599 char n[dash - p + sizeof(".slice")];
1600
989290db 1601#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501 1602 /* msan doesn't instrument stpncpy, so it thinks
5238e957 1603 * n is later used uninitialized:
1c56d501
ZJS
1604 * https://github.com/google/sanitizers/issues/926
1605 */
1606 zero(n);
1607#endif
1608
e66e5b61 1609 /* Don't allow trailing or double dashes */
4c701096 1610 if (IN_SET(dash[1], 0, '-'))
c96cc582 1611 return -EINVAL;
a016b922 1612
c96cc582 1613 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1614 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1615 return -EINVAL;
1616
1617 escaped = cg_escape(n);
1618 if (!escaped)
1619 return -ENOMEM;
1620
1621 if (!strextend(&s, escaped, "/", NULL))
1622 return -ENOMEM;
1623
1624 dash = strchr(dash+1, '-');
1625 }
1626
1627 e = cg_escape(unit);
1628 if (!e)
1629 return -ENOMEM;
1630
1631 if (!strextend(&s, e, NULL))
1632 return -ENOMEM;
1633
ae2a15bc 1634 *ret = TAKE_PTR(s);
a016b922
LP
1635
1636 return 0;
1637}
4ad49000
LP
1638
1639int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1640 _cleanup_free_ char *p = NULL;
1641 int r;
1642
1643 r = cg_get_path(controller, path, attribute, &p);
1644 if (r < 0)
1645 return r;
1646
604028de 1647 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
1648}
1649
934277fe
LP
1650int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1651 _cleanup_free_ char *p = NULL;
1652 int r;
1653
1654 r = cg_get_path(controller, path, attribute, &p);
1655 if (r < 0)
1656 return r;
1657
1658 return read_one_line_file(p, ret);
1659}
1660
613328c3
AZ
1661int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
1662 _cleanup_free_ char *value = NULL;
1663 uint64_t v;
1664 int r;
1665
1666 assert(ret);
1667
1668 r = cg_get_attribute(controller, path, attribute, &value);
1669 if (r == -ENOENT)
1670 return -ENODATA;
1671 if (r < 0)
1672 return r;
1673
1674 if (streq(value, "max")) {
1675 *ret = CGROUP_LIMIT_MAX;
1676 return 0;
1677 }
1678
1679 r = safe_atou64(value, &v);
1680 if (r < 0)
1681 return r;
1682
1683 *ret = v;
1684 return 0;
1685}
1686
25a1f04c 1687int cg_get_keyed_attribute_full(
b734a4ff
LP
1688 const char *controller,
1689 const char *path,
1690 const char *attribute,
1691 char **keys,
25a1f04c
MS
1692 char **ret_values,
1693 CGroupKeyMode mode) {
66ebf6c0 1694
b734a4ff 1695 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 1696 const char *p;
9177fa9f 1697 size_t n, i, n_done = 0;
b734a4ff
LP
1698 char **v;
1699 int r;
1700
4e1dfa45 1701 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
1702 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1703 * entries as 'keys'. On success each entry will be set to the value of the matching key.
1704 *
1705 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
1706
1707 r = cg_get_path(controller, path, attribute, &filename);
1708 if (r < 0)
1709 return r;
1710
b734a4ff 1711 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
1712 if (r < 0)
1713 return r;
1714
b734a4ff
LP
1715 n = strv_length(keys);
1716 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1717 return 0;
66ebf6c0 1718
b734a4ff
LP
1719 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1720 v = newa0(char*, n);
66ebf6c0 1721
b734a4ff
LP
1722 for (p = contents; *p;) {
1723 const char *w = NULL;
b734a4ff 1724
9177fa9f
ZJS
1725 for (i = 0; i < n; i++)
1726 if (!v[i]) {
b734a4ff
LP
1727 w = first_word(p, keys[i]);
1728 if (w)
1729 break;
66ebf6c0 1730 }
66ebf6c0 1731
b734a4ff 1732 if (w) {
b734a4ff
LP
1733 size_t l;
1734
1735 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
1736 v[i] = strndup(w, l);
1737 if (!v[i]) {
b734a4ff
LP
1738 r = -ENOMEM;
1739 goto fail;
66ebf6c0 1740 }
b734a4ff 1741
b734a4ff 1742 n_done++;
b734a4ff
LP
1743 if (n_done >= n)
1744 goto done;
1745
1746 p = w + l;
9177fa9f 1747 } else
b734a4ff 1748 p += strcspn(p, NEWLINE);
b734a4ff
LP
1749
1750 p += strspn(p, NEWLINE);
66ebf6c0
TH
1751 }
1752
25a1f04c
MS
1753 if (mode & CG_KEY_MODE_GRACEFUL)
1754 goto done;
1755 else
1756 r = -ENXIO;
b734a4ff
LP
1757
1758fail:
1759 for (i = 0; i < n; i++)
1760 free(v[i]);
1761
1762 return r;
1763
1764done:
1765 memcpy(ret_values, v, sizeof(char*) * n);
25a1f04c
MS
1766 if (mode & CG_KEY_MODE_GRACEFUL)
1767 return n_done;
1768
66ebf6c0 1769 return 0;
4ad49000
LP
1770}
1771
aae7e17f 1772int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
1773 _cleanup_free_ char *s = NULL;
1774 size_t n = 0, allocated = 0;
1775 bool space = false;
aae7e17f 1776 CGroupController c;
aae7e17f
FB
1777
1778 assert(ret);
1779
1780 if (mask == 0) {
1781 *ret = NULL;
1782 return 0;
1783 }
1784
1785 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
1786 const char *k;
1787 size_t l;
aae7e17f 1788
f99850a0 1789 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
1790 continue;
1791
ec635a2d
LP
1792 k = cgroup_controller_to_string(c);
1793 l = strlen(k);
1794
1795 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
1796 return -ENOMEM;
1797
1798 if (space)
1799 s[n] = ' ';
1800 memcpy(s + n + space, k, l);
1801 n += space + l;
1802
1803 space = true;
aae7e17f
FB
1804 }
1805
ec635a2d 1806 assert(s);
aae7e17f 1807
ec635a2d 1808 s[n] = 0;
ae2a15bc 1809 *ret = TAKE_PTR(s);
ec635a2d 1810
aae7e17f
FB
1811 return 0;
1812}
1813
38a90d45
LP
1814int cg_mask_from_string(const char *value, CGroupMask *ret) {
1815 CGroupMask m = 0;
1816
1817 assert(ret);
aae7e17f
FB
1818 assert(value);
1819
1820 for (;;) {
1821 _cleanup_free_ char *n = NULL;
1822 CGroupController v;
1823 int r;
1824
1825 r = extract_first_word(&value, &n, NULL, 0);
1826 if (r < 0)
1827 return r;
1828 if (r == 0)
1829 break;
1830
1831 v = cgroup_controller_from_string(n);
1832 if (v < 0)
1833 continue;
1834
38a90d45 1835 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 1836 }
38a90d45
LP
1837
1838 *ret = m;
aae7e17f
FB
1839 return 0;
1840}
1841
efdb0237 1842int cg_mask_supported(CGroupMask *ret) {
38a90d45 1843 CGroupMask mask;
415fc41c 1844 int r;
efdb0237 1845
67558d15
LP
1846 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
1847 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
1848 * pseudo-controllers. */
4ad49000 1849
b4cccbc1
LP
1850 r = cg_all_unified();
1851 if (r < 0)
1852 return r;
1853 if (r > 0) {
5f4c5fef 1854 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
1855
1856 /* In the unified hierarchy we can read the supported
1857 * and accessible controllers from a the top-level
1858 * cgroup attribute */
1859
5f4c5fef
LP
1860 r = cg_get_root_path(&root);
1861 if (r < 0)
1862 return r;
1863
1864 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1865 if (r < 0)
1866 return r;
1867
1868 r = read_one_line_file(path, &controllers);
efdb0237
LP
1869 if (r < 0)
1870 return r;
4ad49000 1871
aae7e17f
FB
1872 r = cg_mask_from_string(controllers, &mask);
1873 if (r < 0)
1874 return r;
efdb0237 1875
1fbbb526 1876 /* Mask controllers that are not supported in unified hierarchy. */
03afd780 1877 mask &= CGROUP_MASK_V2;
efdb0237
LP
1878
1879 } else {
1880 CGroupController c;
1881
03afd780 1882 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 1883
38a90d45 1884 mask = 0;
efdb0237 1885 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 1886 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
1887 const char *n;
1888
03afd780
LP
1889 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
1890 continue;
1891
efdb0237
LP
1892 n = cgroup_controller_to_string(c);
1893 if (controller_is_accessible(n) >= 0)
03afd780 1894 mask |= bit;
efdb0237 1895 }
4ad49000
LP
1896 }
1897
efdb0237
LP
1898 *ret = mask;
1899 return 0;
4ad49000 1900}
b12afc8c 1901
6925a0de
LP
1902int cg_kernel_controllers(Set **ret) {
1903 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 1904 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
1905 int r;
1906
6925a0de 1907 assert(ret);
b12afc8c 1908
f09e86bc
LS
1909 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
1910 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
1911 * pseudo-controllers. */
e155a0aa 1912
6925a0de
LP
1913 controllers = set_new(&string_hash_ops);
1914 if (!controllers)
1915 return -ENOMEM;
1916
fdeea3f4
ZJS
1917 r = fopen_unlocked("/proc/cgroups", "re", &f);
1918 if (r == -ENOENT) {
1919 *ret = NULL;
1920 return 0;
b12afc8c 1921 }
fdeea3f4
ZJS
1922 if (r < 0)
1923 return r;
35bbbf85 1924
b12afc8c 1925 /* Ignore the header line */
2351e44d 1926 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
1927
1928 for (;;) {
1929 char *controller;
1930 int enabled = 0;
1931
1932 errno = 0;
1933 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1934
1935 if (feof(f))
1936 break;
1937
66855de7
LP
1938 if (ferror(f))
1939 return errno_or_else(EIO);
b12afc8c
LP
1940
1941 return -EBADMSG;
1942 }
1943
1944 if (!enabled) {
1945 free(controller);
1946 continue;
1947 }
1948
efdb0237 1949 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
1950 free(controller);
1951 return -EBADMSG;
1952 }
1953
1954 r = set_consume(controllers, controller);
1955 if (r < 0)
1956 return r;
1957 }
1958
1cc6c93a 1959 *ret = TAKE_PTR(controllers);
6925a0de 1960
b12afc8c
LP
1961 return 0;
1962}
efdb0237 1963
d4d99bc6
ZJS
1964/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
1965 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
1966 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
1967 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
1968 * with other tools.
f08e9287 1969 *
d4d99bc6
ZJS
1970 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
1971 * cgroup v2 process management but disable the compat dual layout, we return true on
1972 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
f08e9287
TH
1973 */
1974static thread_local bool unified_systemd_v232;
1975
d4d99bc6
ZJS
1976int cg_unified_cached(bool flush) {
1977 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
efdb0237 1978
efdb0237
LP
1979 struct statfs fs;
1980
1981 /* Checks if we support the unified hierarchy. Returns an
1982 * error when the cgroup hierarchies aren't mounted yet or we
1983 * have any other trouble determining if the unified hierarchy
1984 * is supported. */
1985
d4d99bc6
ZJS
1986 if (flush)
1987 unified_cache = CGROUP_UNIFIED_UNKNOWN;
1988 else if (unified_cache >= CGROUP_UNIFIED_NONE)
1989 return unified_cache;
efdb0237
LP
1990
1991 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 1992 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 1993
9aa21133
ZJS
1994 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
1995 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 1996 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 1997 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 1998 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 1999 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2000 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2001 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2002 unified_systemd_v232 = false;
f08e9287 2003 } else {
2977724b 2004 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 2005 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
2006
2007 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2008 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2009 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2010 unified_systemd_v232 = true;
2011 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2012 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2013 unified_cache = CGROUP_UNIFIED_NONE;
2014 } else {
2015 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2016 (unsigned long long) fs.f_type);
5535d8f7 2017 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2018 }
2977724b 2019 }
0bc5f001
DS
2020 } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
2021 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2022 "No filesystem is currently mounted on /sys/fs/cgroup.");
baaa35ad
ZJS
2023 } else
2024 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2025 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2026 (unsigned long long)fs.f_type);
efdb0237 2027
d4d99bc6 2028 return unified_cache;
5da38d07
TH
2029}
2030
c22800e4 2031int cg_unified_controller(const char *controller) {
b4cccbc1 2032 int r;
5da38d07 2033
d4d99bc6 2034 r = cg_unified_cached(false);
b4cccbc1
LP
2035 if (r < 0)
2036 return r;
5da38d07 2037
d4d99bc6 2038 if (r == CGROUP_UNIFIED_NONE)
fc9ae717
LP
2039 return false;
2040
d4d99bc6 2041 if (r >= CGROUP_UNIFIED_ALL)
fc9ae717
LP
2042 return true;
2043
2044 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2045}
2046
b4cccbc1 2047int cg_all_unified(void) {
4bb652ac
LP
2048 int r;
2049
d4d99bc6 2050 r = cg_unified_cached(false);
4bb652ac
LP
2051 if (r < 0)
2052 return r;
2053
d4d99bc6 2054 return r >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2055}
2056
b4cccbc1
LP
2057int cg_hybrid_unified(void) {
2058 int r;
2977724b 2059
d4d99bc6 2060 r = cg_unified_cached(false);
b4cccbc1
LP
2061 if (r < 0)
2062 return r;
2977724b 2063
d4d99bc6 2064 return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
13c31542
TH
2065}
2066
9be57249
TH
2067const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2068 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2069 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2070 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2071 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2072};
2073
2074static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2075 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2076 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2077 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2078 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2079};
2080
2081DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2082
f0bef277
EV
2083bool is_cgroup_fs(const struct statfs *s) {
2084 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2085 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2086}
2087
2088bool fd_is_cgroup_fs(int fd) {
2089 struct statfs s;
2090
2091 if (fstatfs(fd, &s) < 0)
2092 return -errno;
2093
2094 return is_cgroup_fs(&s);
2095}
2096
b82f71c7 2097static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2098 [CGROUP_CONTROLLER_CPU] = "cpu",
2099 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
047f5d63 2100 [CGROUP_CONTROLLER_CPUSET] = "cpuset",
13c31542 2101 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2102 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2103 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2104 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2105 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2106 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2107 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2108};
2109
2110DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2111
2112CGroupMask get_cpu_accounting_mask(void) {
2113 static CGroupMask needed_mask = (CGroupMask) -1;
2114
2115 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2116 * provided externally from the CPU controller, which means we don't
2117 * need to enable the CPU controller just to get metrics. This is good,
2118 * because enabling the CPU controller comes at a minor performance
2119 * hit, especially when it's propagated deep into large hierarchies.
2120 * There's also no separate CPU accounting controller available within
2121 * a unified hierarchy.
2122 *
2123 * This combination of factors results in the desired cgroup mask to
2124 * enable for CPU accounting varying as follows:
2125 *
2126 * ╔═════════════════════╤═════════════════════╗
2127 * ║ Linux ≥4.15 │ Linux <4.15 ║
2128 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2129 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2130 * ╟───────────────╫─────────────────────┼─────────────────────╢
2131 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2132 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2133 *
2134 * We check kernel version here instead of manually checking whether
2135 * cpu.stat is present for every cgroup, as that check in itself would
2136 * already be fairly expensive.
2137 *
2138 * Kernels where this patch has been backported will therefore have the
2139 * CPU controller enabled unnecessarily. This is more expensive than
2140 * necessary, but harmless. ☺️
2141 */
2142
2143 if (needed_mask == (CGroupMask) -1) {
2144 if (cg_all_unified()) {
2145 struct utsname u;
2146 assert_se(uname(&u) >= 0);
2147
2148 if (str_verscmp(u.release, "4.15") < 0)
2149 needed_mask = CGROUP_MASK_CPU;
2150 else
2151 needed_mask = 0;
2152 } else
2153 needed_mask = CGROUP_MASK_CPUACCT;
2154 }
2155
2156 return needed_mask;
2157}
2158
2159bool cpu_accounting_is_cheap(void) {
2160 return get_cpu_accounting_mask() == 0;
2161}