]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
Merge pull request #11827 from keszybz/pkgconfig-variables
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833 2
84ac7bea 3#include <dirent.h>
8c6db833 4#include <errno.h>
84ac7bea 5#include <ftw.h>
11c3a366 6#include <limits.h>
8c6db833 7#include <signal.h>
11c3a366 8#include <stddef.h>
35bbbf85 9#include <stdio_ext.h>
8c6db833 10#include <stdlib.h>
84ac7bea 11#include <string.h>
672c48cc 12#include <sys/stat.h>
11c3a366 13#include <sys/statfs.h>
672c48cc 14#include <sys/types.h>
f98c2585 15#include <sys/utsname.h>
4b58153d 16#include <sys/xattr.h>
84ac7bea 17#include <unistd.h>
8c6db833 18
b5efdb8a 19#include "alloc-util.h"
3ffd4af2 20#include "cgroup-util.h"
93cc7779 21#include "def.h"
a0956174 22#include "dirent-util.h"
84ac7bea 23#include "extract-word.h"
3ffd4af2 24#include "fd-util.h"
84ac7bea 25#include "fileio.h"
f97b34a6 26#include "format-util.h"
f4f15635 27#include "fs-util.h"
93cc7779 28#include "log.h"
84ac7bea
LP
29#include "login-util.h"
30#include "macro.h"
93cc7779 31#include "missing.h"
84ac7bea 32#include "mkdir.h"
6bedfcbb 33#include "parse-util.h"
9eb977db 34#include "path-util.h"
872a590e 35#include "proc-cmdline.h"
84ac7bea
LP
36#include "process-util.h"
37#include "set.h"
9444b1f2 38#include "special.h"
872a590e 39#include "stat-util.h"
d054f0a4 40#include "stdio-util.h"
8b43440b 41#include "string-table.h"
07630cea 42#include "string-util.h"
aae7e17f 43#include "strv.h"
84ac7bea 44#include "unit-name.h"
b1d4f8e1 45#include "user-util.h"
8c6db833 46
c6c18be3 47int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
7027ff61 48 _cleanup_free_ char *fs = NULL;
c6c18be3 49 FILE *f;
7027ff61 50 int r;
c6c18be3 51
c6c18be3
LP
52 assert(_f);
53
c3175a7f
LP
54 r = cg_get_path(controller, path, "cgroup.procs", &fs);
55 if (r < 0)
c6c18be3
LP
56 return r;
57
58 f = fopen(fs, "re");
c6c18be3
LP
59 if (!f)
60 return -errno;
61
62 *_f = f;
63 return 0;
64}
65
c6c18be3
LP
66int cg_read_pid(FILE *f, pid_t *_pid) {
67 unsigned long ul;
68
69 /* Note that the cgroup.procs might contain duplicates! See
70 * cgroups.txt for details. */
71
7027ff61
LP
72 assert(f);
73 assert(_pid);
74
c6c18be3
LP
75 errno = 0;
76 if (fscanf(f, "%lu", &ul) != 1) {
77
78 if (feof(f))
79 return 0;
80
f5e5c28f 81 return errno > 0 ? -errno : -EIO;
c6c18be3
LP
82 }
83
84 if (ul <= 0)
85 return -EIO;
86
87 *_pid = (pid_t) ul;
88 return 1;
89}
90
8b238b13
LP
91int cg_read_event(
92 const char *controller,
93 const char *path,
94 const char *event,
95 char **val) {
96
ab2c3861
TH
97 _cleanup_free_ char *events = NULL, *content = NULL;
98 char *p, *line;
99 int r;
100
101 r = cg_get_path(controller, path, "cgroup.events", &events);
102 if (r < 0)
103 return r;
104
105 r = read_full_file(events, &content, NULL);
106 if (r < 0)
107 return r;
108
109 p = content;
110 while ((line = strsep(&p, "\n"))) {
111 char *key;
112
113 key = strsep(&line, " ");
114 if (!key || !line)
115 return -EINVAL;
116
117 if (strcmp(key, event))
118 continue;
119
120 *val = strdup(line);
121 return 0;
122 }
123
124 return -ENOENT;
125}
126
3228995c
CB
127bool cg_ns_supported(void) {
128 static thread_local int enabled = -1;
129
130 if (enabled >= 0)
131 return enabled;
132
0887fa71
LP
133 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
134 if (errno != ENOENT)
135 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
136 enabled = false;
137 } else
138 enabled = true;
3228995c
CB
139
140 return enabled;
141}
142
35d2e7ec 143int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 144 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
145 int r;
146 DIR *d;
147
35d2e7ec
LP
148 assert(_d);
149
150 /* This is not recursive! */
151
c3175a7f
LP
152 r = cg_get_path(controller, path, NULL, &fs);
153 if (r < 0)
35d2e7ec
LP
154 return r;
155
156 d = opendir(fs);
35d2e7ec
LP
157 if (!d)
158 return -errno;
159
160 *_d = d;
161 return 0;
162}
163
164int cg_read_subgroup(DIR *d, char **fn) {
165 struct dirent *de;
166
167 assert(d);
7027ff61 168 assert(fn);
35d2e7ec 169
f01327ad 170 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
171 char *b;
172
173 if (de->d_type != DT_DIR)
174 continue;
175
49bfc877 176 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
177 continue;
178
7027ff61
LP
179 b = strdup(de->d_name);
180 if (!b)
35d2e7ec
LP
181 return -ENOMEM;
182
183 *fn = b;
184 return 1;
185 }
186
35d2e7ec
LP
187 return 0;
188}
189
4ad49000 190int cg_rmdir(const char *controller, const char *path) {
7027ff61 191 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
192 int r;
193
ad293f5a
LP
194 r = cg_get_path(controller, path, NULL, &p);
195 if (r < 0)
35d2e7ec
LP
196 return r;
197
198 r = rmdir(p);
7027ff61
LP
199 if (r < 0 && errno != ENOENT)
200 return -errno;
35d2e7ec 201
b4cccbc1 202 r = cg_hybrid_unified();
f20db199 203 if (r <= 0)
b4cccbc1 204 return r;
b4cccbc1
LP
205
206 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
207 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
208 if (r < 0)
209 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
210 }
211
7027ff61 212 return 0;
35d2e7ec
LP
213}
214
1d98fef1
LP
215int cg_kill(
216 const char *controller,
217 const char *path,
218 int sig,
219 CGroupFlags flags,
220 Set *s,
221 cg_kill_log_func_t log_kill,
222 void *userdata) {
223
7027ff61 224 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 225 bool done = false;
c53d2d54 226 int r, ret = 0, ret_log_kill = 0;
35d2e7ec 227 pid_t my_pid;
8c6db833 228
8c6db833
LP
229 assert(sig >= 0);
230
0d5b4810
LP
231 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
232 * SIGCONT on SIGKILL. */
233 if (IN_SET(sig, SIGCONT, SIGKILL))
234 flags &= ~CGROUP_SIGCONT;
235
8c6db833
LP
236 /* This goes through the tasks list and kills them all. This
237 * is repeated until no further processes are added to the
238 * tasks list, to properly handle forking processes */
239
7027ff61 240 if (!s) {
d5099efc 241 s = allocated_set = set_new(NULL);
7027ff61 242 if (!s)
ca949c9d 243 return -ENOMEM;
7027ff61 244 }
8c6db833 245
df0ff127 246 my_pid = getpid_cached();
8c6db833
LP
247
248 do {
7027ff61 249 _cleanup_fclose_ FILE *f = NULL;
0b172489 250 pid_t pid = 0;
8c6db833
LP
251 done = true;
252
7027ff61
LP
253 r = cg_enumerate_processes(controller, path, &f);
254 if (r < 0) {
4c633005 255 if (ret >= 0 && r != -ENOENT)
7027ff61 256 return r;
35d2e7ec 257
7027ff61 258 return ret;
35d2e7ec 259 }
c6c18be3
LP
260
261 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 262
1d98fef1 263 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 264 continue;
8c6db833 265
fea72cc0 266 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 267 continue;
8c6db833 268
1d98fef1 269 if (log_kill)
c53d2d54 270 ret_log_kill = log_kill(pid, sig, userdata);
1d98fef1 271
8c6db833
LP
272 /* If we haven't killed this process yet, kill
273 * it */
4c633005
LP
274 if (kill(pid, sig) < 0) {
275 if (ret >= 0 && errno != ESRCH)
8c6db833 276 ret = -errno;
6e8314c4 277 } else {
1d98fef1 278 if (flags & CGROUP_SIGCONT)
e155a0aa 279 (void) kill(pid, SIGCONT);
430c18ed 280
c53d2d54
DB
281 if (ret == 0) {
282 if (log_kill)
283 ret = ret_log_kill;
284 else
285 ret = 1;
286 }
430c18ed 287 }
8c6db833 288
8c6db833
LP
289 done = false;
290
fea72cc0 291 r = set_put(s, PID_TO_PTR(pid));
7027ff61 292 if (r < 0) {
35d2e7ec 293 if (ret >= 0)
7027ff61 294 return r;
35d2e7ec 295
7027ff61 296 return ret;
35d2e7ec
LP
297 }
298 }
299
300 if (r < 0) {
301 if (ret >= 0)
7027ff61 302 return r;
35d2e7ec 303
7027ff61 304 return ret;
8c6db833
LP
305 }
306
8c6db833
LP
307 /* To avoid racing against processes which fork
308 * quicker than we can kill them we repeat this until
309 * no new pids need to be killed. */
310
35d2e7ec 311 } while (!done);
8c6db833 312
35d2e7ec 313 return ret;
8c6db833
LP
314}
315
1d98fef1
LP
316int cg_kill_recursive(
317 const char *controller,
318 const char *path,
319 int sig,
320 CGroupFlags flags,
321 Set *s,
322 cg_kill_log_func_t log_kill,
323 void *userdata) {
324
7027ff61
LP
325 _cleanup_set_free_ Set *allocated_set = NULL;
326 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 327 int r, ret;
35d2e7ec 328 char *fn;
8c6db833
LP
329
330 assert(path);
8c6db833
LP
331 assert(sig >= 0);
332
7027ff61 333 if (!s) {
d5099efc 334 s = allocated_set = set_new(NULL);
7027ff61 335 if (!s)
ca949c9d 336 return -ENOMEM;
7027ff61 337 }
ca949c9d 338
1d98fef1 339 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 340
7027ff61
LP
341 r = cg_enumerate_subgroups(controller, path, &d);
342 if (r < 0) {
4c633005 343 if (ret >= 0 && r != -ENOENT)
7027ff61 344 return r;
8c6db833 345
7027ff61 346 return ret;
35d2e7ec 347 }
8c6db833 348
35d2e7ec 349 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 350 _cleanup_free_ char *p = NULL;
8c6db833 351
605405c6 352 p = strjoin(path, "/", fn);
35d2e7ec 353 free(fn);
7027ff61
LP
354 if (!p)
355 return -ENOMEM;
8c6db833 356
1d98fef1 357 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 358 if (r != 0 && ret >= 0)
35d2e7ec 359 ret = r;
8c6db833 360 }
7027ff61 361 if (ret >= 0 && r < 0)
35d2e7ec
LP
362 ret = r;
363
1d98fef1 364 if (flags & CGROUP_REMOVE) {
4ad49000 365 r = cg_rmdir(controller, path);
4c701096 366 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
367 return r;
368 }
ca949c9d 369
8c6db833
LP
370 return ret;
371}
372
1d98fef1
LP
373int cg_migrate(
374 const char *cfrom,
375 const char *pfrom,
376 const char *cto,
377 const char *pto,
378 CGroupFlags flags) {
379
35d2e7ec 380 bool done = false;
246aa6dd 381 _cleanup_set_free_ Set *s = NULL;
8c6db833
LP
382 int r, ret = 0;
383 pid_t my_pid;
384
246aa6dd
LP
385 assert(cfrom);
386 assert(pfrom);
387 assert(cto);
388 assert(pto);
8c6db833 389
d5099efc 390 s = set_new(NULL);
246aa6dd 391 if (!s)
35d2e7ec
LP
392 return -ENOMEM;
393
df0ff127 394 my_pid = getpid_cached();
8c6db833
LP
395
396 do {
7027ff61 397 _cleanup_fclose_ FILE *f = NULL;
0b172489 398 pid_t pid = 0;
8c6db833
LP
399 done = true;
400
b043cd0b 401 r = cg_enumerate_processes(cfrom, pfrom, &f);
246aa6dd 402 if (r < 0) {
4c633005 403 if (ret >= 0 && r != -ENOENT)
7027ff61 404 return r;
35d2e7ec 405
246aa6dd 406 return ret;
35d2e7ec 407 }
c6c18be3
LP
408
409 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 410
35d2e7ec
LP
411 /* This might do weird stuff if we aren't a
412 * single-threaded program. However, we
413 * luckily know we are not */
1d98fef1 414 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 415 continue;
8c6db833 416
fea72cc0 417 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
35d2e7ec
LP
418 continue;
419
9b84c7f9
LP
420 /* Ignore kernel threads. Since they can only
421 * exist in the root cgroup, we only check for
422 * them there. */
423 if (cfrom &&
57ea45e1 424 empty_or_root(pfrom) &&
9b84c7f9
LP
425 is_kernel_thread(pid) > 0)
426 continue;
427
246aa6dd
LP
428 r = cg_attach(cto, pto, pid);
429 if (r < 0) {
4c633005 430 if (ret >= 0 && r != -ESRCH)
35d2e7ec
LP
431 ret = r;
432 } else if (ret == 0)
433 ret = 1;
8c6db833 434
8c6db833 435 done = false;
35d2e7ec 436
fea72cc0 437 r = set_put(s, PID_TO_PTR(pid));
246aa6dd 438 if (r < 0) {
35d2e7ec 439 if (ret >= 0)
7027ff61 440 return r;
35d2e7ec 441
246aa6dd 442 return ret;
35d2e7ec
LP
443 }
444 }
445
446 if (r < 0) {
447 if (ret >= 0)
7027ff61 448 return r;
35d2e7ec 449
246aa6dd 450 return ret;
8c6db833 451 }
35d2e7ec 452 } while (!done);
8c6db833 453
35d2e7ec 454 return ret;
8c6db833
LP
455}
456
4ad49000
LP
457int cg_migrate_recursive(
458 const char *cfrom,
459 const char *pfrom,
460 const char *cto,
461 const char *pto,
1d98fef1 462 CGroupFlags flags) {
4ad49000 463
246aa6dd 464 _cleanup_closedir_ DIR *d = NULL;
7027ff61 465 int r, ret = 0;
35d2e7ec 466 char *fn;
8c6db833 467
246aa6dd
LP
468 assert(cfrom);
469 assert(pfrom);
470 assert(cto);
471 assert(pto);
8c6db833 472
1d98fef1 473 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
8c6db833 474
246aa6dd
LP
475 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
476 if (r < 0) {
4c633005 477 if (ret >= 0 && r != -ENOENT)
7027ff61
LP
478 return r;
479
246aa6dd 480 return ret;
35d2e7ec
LP
481 }
482
483 while ((r = cg_read_subgroup(d, &fn)) > 0) {
246aa6dd 484 _cleanup_free_ char *p = NULL;
8c6db833 485
605405c6 486 p = strjoin(pfrom, "/", fn);
35d2e7ec 487 free(fn);
e155a0aa
LP
488 if (!p)
489 return -ENOMEM;
8c6db833 490
1d98fef1 491 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
35d2e7ec
LP
492 if (r != 0 && ret >= 0)
493 ret = r;
8c6db833
LP
494 }
495
35d2e7ec
LP
496 if (r < 0 && ret >= 0)
497 ret = r;
498
1d98fef1 499 if (flags & CGROUP_REMOVE) {
4ad49000 500 r = cg_rmdir(cfrom, pfrom);
4c701096 501 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
246aa6dd
LP
502 return r;
503 }
8c6db833
LP
504
505 return ret;
506}
507
13b84ec7
LP
508int cg_migrate_recursive_fallback(
509 const char *cfrom,
510 const char *pfrom,
511 const char *cto,
512 const char *pto,
1d98fef1 513 CGroupFlags flags) {
13b84ec7
LP
514
515 int r;
516
517 assert(cfrom);
518 assert(pfrom);
519 assert(cto);
520 assert(pto);
521
1d98fef1 522 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
13b84ec7
LP
523 if (r < 0) {
524 char prefix[strlen(pto) + 1];
525
526 /* This didn't work? Then let's try all prefixes of the destination */
527
fecffe5d 528 PATH_FOREACH_PREFIX(prefix, pto) {
e155a0aa
LP
529 int q;
530
1d98fef1 531 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
e155a0aa
LP
532 if (q >= 0)
533 return q;
13b84ec7
LP
534 }
535 }
536
e155a0aa 537 return r;
13b84ec7
LP
538}
539
efdb0237
LP
540static const char *controller_to_dirname(const char *controller) {
541 const char *e;
3474ae3c 542
7027ff61
LP
543 assert(controller);
544
efdb0237
LP
545 /* Converts a controller name to the directory name below
546 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
547 * just cuts off the name= prefixed used for named
548 * hierarchies, if it is specified. */
549
2977724b 550 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 551 if (cg_hybrid_unified() > 0)
2977724b
TH
552 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
553 else
554 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
555 }
b6629c4b 556
efdb0237
LP
557 e = startswith(controller, "name=");
558 if (e)
559 return e;
560
561 return controller;
3474ae3c
LP
562}
563
569b19d8
LP
564static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
565 const char *dn;
018ef268 566 char *t = NULL;
3474ae3c 567
efdb0237 568 assert(fs);
569b19d8
LP
569 assert(controller);
570
571 dn = controller_to_dirname(controller);
efdb0237
LP
572
573 if (isempty(path) && isempty(suffix))
569b19d8 574 t = strappend("/sys/fs/cgroup/", dn);
efdb0237 575 else if (isempty(path))
605405c6 576 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
efdb0237 577 else if (isempty(suffix))
605405c6 578 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
efdb0237 579 else
605405c6 580 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
efdb0237
LP
581 if (!t)
582 return -ENOMEM;
3474ae3c 583
efdb0237
LP
584 *fs = t;
585 return 0;
586}
587
588static int join_path_unified(const char *path, const char *suffix, char **fs) {
589 char *t;
590
591 assert(fs);
592
593 if (isempty(path) && isempty(suffix))
594 t = strdup("/sys/fs/cgroup");
595 else if (isempty(path))
596 t = strappend("/sys/fs/cgroup/", suffix);
597 else if (isempty(suffix))
598 t = strappend("/sys/fs/cgroup/", path);
599 else
605405c6 600 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
3474ae3c
LP
601 if (!t)
602 return -ENOMEM;
603
efdb0237 604 *fs = t;
3474ae3c
LP
605 return 0;
606}
607
8c6db833 608int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 609 int r;
8c6db833 610
dbd821ac
LP
611 assert(fs);
612
efdb0237
LP
613 if (!controller) {
614 char *t;
615
569b19d8
LP
616 /* If no controller is specified, we return the path
617 * *below* the controllers, without any prefix. */
efdb0237
LP
618
619 if (!path && !suffix)
620 return -EINVAL;
621
989189ea 622 if (!suffix)
efdb0237 623 t = strdup(path);
989189ea 624 else if (!path)
efdb0237
LP
625 t = strdup(suffix);
626 else
605405c6 627 t = strjoin(path, "/", suffix);
efdb0237
LP
628 if (!t)
629 return -ENOMEM;
630
858d36c1 631 *fs = path_simplify(t, false);
efdb0237
LP
632 return 0;
633 }
634
635 if (!cg_controller_is_valid(controller))
78edb35a
LP
636 return -EINVAL;
637
b4cccbc1
LP
638 r = cg_all_unified();
639 if (r < 0)
640 return r;
641 if (r > 0)
efdb0237 642 r = join_path_unified(path, suffix, fs);
569b19d8
LP
643 else
644 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
645 if (r < 0)
646 return r;
7027ff61 647
858d36c1 648 path_simplify(*fs, false);
efdb0237 649 return 0;
3474ae3c 650}
dbd821ac 651
efdb0237 652static int controller_is_accessible(const char *controller) {
b4cccbc1 653 int r;
37099707 654
efdb0237 655 assert(controller);
37099707 656
efdb0237
LP
657 /* Checks whether a specific controller is accessible,
658 * i.e. its hierarchy mounted. In the unified hierarchy all
659 * controllers are considered accessible, except for the named
660 * hierarchies */
b12afc8c 661
efdb0237
LP
662 if (!cg_controller_is_valid(controller))
663 return -EINVAL;
664
b4cccbc1
LP
665 r = cg_all_unified();
666 if (r < 0)
667 return r;
668 if (r > 0) {
efdb0237
LP
669 /* We don't support named hierarchies if we are using
670 * the unified hierarchy. */
671
672 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
673 return 0;
674
675 if (startswith(controller, "name="))
676 return -EOPNOTSUPP;
677
678 } else {
679 const char *cc, *dn;
680
681 dn = controller_to_dirname(controller);
682 cc = strjoina("/sys/fs/cgroup/", dn);
683
684 if (laccess(cc, F_OK) < 0)
685 return -errno;
686 }
37099707
LP
687
688 return 0;
689}
690
3474ae3c 691int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 692 int r;
dbd821ac 693
efdb0237 694 assert(controller);
3474ae3c 695 assert(fs);
70132bd0 696
efdb0237
LP
697 /* Check if the specified controller is actually accessible */
698 r = controller_is_accessible(controller);
37099707
LP
699 if (r < 0)
700 return r;
3474ae3c 701
efdb0237 702 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
703}
704
e27796a0 705static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
4ad49000
LP
706 assert(path);
707 assert(sb);
708 assert(ftwbuf);
e27796a0
LP
709
710 if (typeflag != FTW_DP)
711 return 0;
712
713 if (ftwbuf->level < 1)
714 return 0;
715
e155a0aa 716 (void) rmdir(path);
e27796a0
LP
717 return 0;
718}
719
8c6db833 720int cg_trim(const char *controller, const char *path, bool delete_root) {
7027ff61 721 _cleanup_free_ char *fs = NULL;
2977724b 722 int r = 0, q;
8c6db833 723
8c6db833
LP
724 assert(path);
725
e27796a0
LP
726 r = cg_get_path(controller, path, NULL, &fs);
727 if (r < 0)
8c6db833
LP
728 return r;
729
e27796a0 730 errno = 0;
e155a0aa
LP
731 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
732 if (errno == ENOENT)
733 r = 0;
b3267152 734 else if (errno > 0)
e155a0aa
LP
735 r = -errno;
736 else
737 r = -EIO;
738 }
e27796a0
LP
739
740 if (delete_root) {
4ad49000
LP
741 if (rmdir(fs) < 0 && errno != ENOENT)
742 return -errno;
e27796a0
LP
743 }
744
b4cccbc1
LP
745 q = cg_hybrid_unified();
746 if (q < 0)
747 return q;
748 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
749 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
750 if (q < 0)
751 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
752 }
753
e27796a0 754 return r;
8c6db833
LP
755}
756
65be7e06
ZJS
757/* Create a cgroup in the hierarchy of controller.
758 * Returns 0 if the group already existed, 1 on success, negative otherwise.
759 */
1434ae6f
LP
760int cg_create(const char *controller, const char *path) {
761 _cleanup_free_ char *fs = NULL;
762 int r;
763
764 r = cg_get_path_and_check(controller, path, NULL, &fs);
765 if (r < 0)
766 return r;
767
768 r = mkdir_parents(fs, 0755);
769 if (r < 0)
770 return r;
771
dae8b82e
ZJS
772 r = mkdir_errno_wrapper(fs, 0755);
773 if (r == -EEXIST)
774 return 0;
775 if (r < 0)
776 return r;
1434ae6f 777
b4cccbc1
LP
778 r = cg_hybrid_unified();
779 if (r < 0)
780 return r;
781
782 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
783 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
784 if (r < 0)
785 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
786 }
787
1434ae6f
LP
788 return 1;
789}
790
791int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
792 int r, q;
793
794 assert(pid >= 0);
795
796 r = cg_create(controller, path);
797 if (r < 0)
798 return r;
799
800 q = cg_attach(controller, path, pid);
801 if (q < 0)
802 return q;
803
804 /* This does not remove the cgroup on failure */
805 return r;
806}
807
8c6db833 808int cg_attach(const char *controller, const char *path, pid_t pid) {
574d5f2d
LP
809 _cleanup_free_ char *fs = NULL;
810 char c[DECIMAL_STR_MAX(pid_t) + 2];
8c6db833
LP
811 int r;
812
8c6db833
LP
813 assert(path);
814 assert(pid >= 0);
815
b043cd0b 816 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
3474ae3c 817 if (r < 0)
c6c18be3 818 return r;
8c6db833
LP
819
820 if (pid == 0)
df0ff127 821 pid = getpid_cached();
8c6db833 822
d054f0a4 823 xsprintf(c, PID_FMT "\n", pid);
8c6db833 824
604028de 825 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
2977724b
TH
826 if (r < 0)
827 return r;
828
b4cccbc1
LP
829 r = cg_hybrid_unified();
830 if (r < 0)
831 return r;
832
833 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
834 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
835 if (r < 0)
bd68e99b 836 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
2977724b
TH
837 }
838
839 return 0;
8c6db833
LP
840}
841
13b84ec7
LP
842int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
843 int r;
844
845 assert(controller);
846 assert(path);
847 assert(pid >= 0);
848
849 r = cg_attach(controller, path, pid);
850 if (r < 0) {
851 char prefix[strlen(path) + 1];
852
853 /* This didn't work? Then let's try all prefixes of
854 * the destination */
855
fecffe5d 856 PATH_FOREACH_PREFIX(prefix, path) {
e155a0aa
LP
857 int q;
858
859 q = cg_attach(controller, prefix, pid);
860 if (q >= 0)
861 return q;
13b84ec7
LP
862 }
863 }
864
e155a0aa 865 return r;
13b84ec7
LP
866}
867
62b9bb26 868int cg_set_access(
2d76d14e
LP
869 const char *controller,
870 const char *path,
2d76d14e
LP
871 uid_t uid,
872 gid_t gid) {
873
62b9bb26
LP
874 struct Attribute {
875 const char *name;
876 bool fatal;
877 };
878
4e1dfa45 879 /* cgroup v1, aka legacy/non-unified */
62b9bb26
LP
880 static const struct Attribute legacy_attributes[] = {
881 { "cgroup.procs", true },
882 { "tasks", false },
883 { "cgroup.clone_children", false },
884 {},
885 };
886
4e1dfa45 887 /* cgroup v2, aka unified */
62b9bb26
LP
888 static const struct Attribute unified_attributes[] = {
889 { "cgroup.procs", true },
890 { "cgroup.subtree_control", true },
891 { "cgroup.threads", false },
892 {},
893 };
894
895 static const struct Attribute* const attributes[] = {
896 [false] = legacy_attributes,
897 [true] = unified_attributes,
898 };
974efc46 899
40853aa5 900 _cleanup_free_ char *fs = NULL;
62b9bb26
LP
901 const struct Attribute *i;
902 int r, unified;
8c6db833 903
8c6db833
LP
904 assert(path);
905
62b9bb26 906 if (uid == UID_INVALID && gid == GID_INVALID)
8d53b453
LP
907 return 0;
908
62b9bb26
LP
909 unified = cg_unified_controller(controller);
910 if (unified < 0)
911 return unified;
8c6db833 912
62b9bb26
LP
913 /* Configure access to the cgroup itself */
914 r = cg_get_path(controller, path, NULL, &fs);
974efc46
LP
915 if (r < 0)
916 return r;
8c6db833 917
62b9bb26 918 r = chmod_and_chown(fs, 0755, uid, gid);
b4cccbc1
LP
919 if (r < 0)
920 return r;
40853aa5 921
62b9bb26
LP
922 /* Configure access to the cgroup's attributes */
923 for (i = attributes[unified]; i->name; i++) {
40853aa5 924 fs = mfree(fs);
40853aa5 925
62b9bb26 926 r = cg_get_path(controller, path, i->name, &fs);
40853aa5
LP
927 if (r < 0)
928 return r;
efdb0237 929
62b9bb26
LP
930 r = chmod_and_chown(fs, 0644, uid, gid);
931 if (r < 0) {
932 if (i->fatal)
933 return r;
5beac75e 934
62b9bb26
LP
935 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
936 }
937 }
938
939 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
940 r = cg_hybrid_unified();
2977724b 941 if (r < 0)
62b9bb26
LP
942 return r;
943 if (r > 0) {
944 /* Always propagate access mode from unified to legacy controller */
945 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
946 if (r < 0)
947 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
948 }
2977724b 949 }
974efc46 950
efdb0237 951 return 0;
8c6db833
LP
952}
953
4b58153d
LP
954int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
955 _cleanup_free_ char *fs = NULL;
956 int r;
957
958 assert(path);
959 assert(name);
960 assert(value || size <= 0);
961
962 r = cg_get_path(controller, path, NULL, &fs);
963 if (r < 0)
964 return r;
965
966 if (setxattr(fs, name, value, size, flags) < 0)
967 return -errno;
968
969 return 0;
970}
971
972int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
973 _cleanup_free_ char *fs = NULL;
974 ssize_t n;
975 int r;
976
977 assert(path);
978 assert(name);
979
980 r = cg_get_path(controller, path, NULL, &fs);
981 if (r < 0)
982 return r;
983
984 n = getxattr(fs, name, value, size);
985 if (n < 0)
986 return -errno;
987
988 return (int) n;
989}
990
7027ff61 991int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 992 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 993 const char *fs, *controller_str;
d2b39cb6 994 int unified, r;
efdb0237 995 size_t cs = 0;
8c6db833 996
8c6db833 997 assert(path);
c6c18be3 998 assert(pid >= 0);
8c6db833 999
5da38d07
TH
1000 if (controller) {
1001 if (!cg_controller_is_valid(controller))
1002 return -EINVAL;
1003 } else
1004 controller = SYSTEMD_CGROUP_CONTROLLER;
1005
c22800e4 1006 unified = cg_unified_controller(controller);
b4cccbc1
LP
1007 if (unified < 0)
1008 return unified;
1009 if (unified == 0) {
b6629c4b
TH
1010 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1011 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1012 else
1013 controller_str = controller;
1014
1015 cs = strlen(controller_str);
1016 }
7027ff61 1017
b68fa010 1018 fs = procfs_file_alloca(pid, "cgroup");
c6c18be3 1019 f = fopen(fs, "re");
4c633005
LP
1020 if (!f)
1021 return errno == ENOENT ? -ESRCH : -errno;
1022
35bbbf85
LP
1023 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1024
d2b39cb6
LP
1025 for (;;) {
1026 _cleanup_free_ char *line = NULL;
efdb0237 1027 char *e, *p;
c6c18be3 1028
d2b39cb6
LP
1029 r = read_line(f, LONG_LINE_MAX, &line);
1030 if (r < 0)
1031 return r;
1032 if (r == 0)
1033 break;
c6c18be3 1034
efdb0237
LP
1035 if (unified) {
1036 e = startswith(line, "0:");
1037 if (!e)
1038 continue;
c6c18be3 1039
efdb0237
LP
1040 e = strchr(e, ':');
1041 if (!e)
1042 continue;
1043 } else {
1044 char *l;
1045 size_t k;
1046 const char *word, *state;
1047 bool found = false;
1048
1049 l = strchr(line, ':');
1050 if (!l)
1051 continue;
8af8afd6 1052
efdb0237
LP
1053 l++;
1054 e = strchr(l, ':');
1055 if (!e)
1056 continue;
8af8afd6 1057
efdb0237 1058 *e = 0;
00d4b1e6 1059 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 1060 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
1061 found = true;
1062 break;
1063 }
efdb0237
LP
1064 if (!found)
1065 continue;
8af8afd6
LP
1066 }
1067
8af8afd6 1068 p = strdup(e + 1);
7027ff61
LP
1069 if (!p)
1070 return -ENOMEM;
c6c18be3 1071
5e20b0a4
LP
1072 /* Truncate suffix indicating the process is a zombie */
1073 e = endswith(p, " (deleted)");
1074 if (e)
1075 *e = 0;
1076
c6c18be3 1077 *path = p;
7027ff61 1078 return 0;
c6c18be3
LP
1079 }
1080
1c80e425 1081 return -ENODATA;
8c6db833
LP
1082}
1083
1084int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 1085 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 1086 const char *sc;
415fc41c 1087 int r;
8c6db833 1088
8c6db833
LP
1089 assert(agent);
1090
c22800e4 1091 r = cg_unified_controller(controller);
b4cccbc1
LP
1092 if (r < 0)
1093 return r;
1094 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
1095 return -EOPNOTSUPP;
1096
7027ff61
LP
1097 r = cg_get_path(controller, NULL, "release_agent", &fs);
1098 if (r < 0)
c6c18be3 1099 return r;
8c6db833 1100
7027ff61
LP
1101 r = read_one_line_file(fs, &contents);
1102 if (r < 0)
1103 return r;
8c6db833
LP
1104
1105 sc = strstrip(contents);
e155a0aa 1106 if (isempty(sc)) {
604028de 1107 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 1108 if (r < 0)
7027ff61 1109 return r;
b8725df8 1110 } else if (!path_equal(sc, agent))
7027ff61 1111 return -EEXIST;
8c6db833 1112
0da16248 1113 fs = mfree(fs);
7027ff61
LP
1114 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1115 if (r < 0)
1116 return r;
8c6db833 1117
0da16248 1118 contents = mfree(contents);
7027ff61
LP
1119 r = read_one_line_file(fs, &contents);
1120 if (r < 0)
1121 return r;
8c6db833
LP
1122
1123 sc = strstrip(contents);
8c6db833 1124 if (streq(sc, "0")) {
604028de 1125 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
1126 if (r < 0)
1127 return r;
c6c18be3 1128
7027ff61
LP
1129 return 1;
1130 }
8c6db833 1131
7027ff61
LP
1132 if (!streq(sc, "1"))
1133 return -EIO;
8c6db833 1134
7027ff61 1135 return 0;
8c6db833
LP
1136}
1137
ad929bcc
KS
1138int cg_uninstall_release_agent(const char *controller) {
1139 _cleanup_free_ char *fs = NULL;
415fc41c 1140 int r;
efdb0237 1141
c22800e4 1142 r = cg_unified_controller(controller);
b4cccbc1
LP
1143 if (r < 0)
1144 return r;
1145 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 1146 return -EOPNOTSUPP;
ad929bcc 1147
ac9ef333
LP
1148 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1149 if (r < 0)
1150 return r;
1151
604028de 1152 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
1153 if (r < 0)
1154 return r;
1155
0da16248 1156 fs = mfree(fs);
ac9ef333 1157
ad929bcc
KS
1158 r = cg_get_path(controller, NULL, "release_agent", &fs);
1159 if (r < 0)
1160 return r;
1161
604028de 1162 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
1163 if (r < 0)
1164 return r;
1165
ac9ef333 1166 return 0;
ad929bcc
KS
1167}
1168
6f883237 1169int cg_is_empty(const char *controller, const char *path) {
7027ff61 1170 _cleanup_fclose_ FILE *f = NULL;
efdb0237 1171 pid_t pid;
7027ff61 1172 int r;
8c6db833 1173
8c6db833
LP
1174 assert(path);
1175
b043cd0b 1176 r = cg_enumerate_processes(controller, path, &f);
6f883237 1177 if (r == -ENOENT)
1bcf3fc6 1178 return true;
c3175a7f 1179 if (r < 0)
6f883237 1180 return r;
8c6db833 1181
6f883237 1182 r = cg_read_pid(f, &pid);
c6c18be3
LP
1183 if (r < 0)
1184 return r;
8c6db833 1185
6f883237 1186 return r == 0;
8c6db833
LP
1187}
1188
6f883237 1189int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 1190 int r;
8c6db833 1191
8c6db833
LP
1192 assert(path);
1193
6fd66507 1194 /* The root cgroup is always populated */
57ea45e1 1195 if (controller && empty_or_root(path))
efdb0237 1196 return false;
6fd66507 1197
c22800e4 1198 r = cg_unified_controller(controller);
b4cccbc1
LP
1199 if (r < 0)
1200 return r;
1201 if (r > 0) {
ab2c3861 1202 _cleanup_free_ char *t = NULL;
8c6db833 1203
efdb0237 1204 /* On the unified hierarchy we can check empty state
ab2c3861 1205 * via the "populated" attribute of "cgroup.events". */
8c6db833 1206
ab2c3861 1207 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
1208 if (r == -ENOENT)
1209 return true;
efdb0237
LP
1210 if (r < 0)
1211 return r;
1212
1213 return streq(t, "0");
1214 } else {
1215 _cleanup_closedir_ DIR *d = NULL;
1216 char *fn;
8c6db833 1217
efdb0237 1218 r = cg_is_empty(controller, path);
35d2e7ec 1219 if (r <= 0)
7027ff61 1220 return r;
35d2e7ec 1221
efdb0237
LP
1222 r = cg_enumerate_subgroups(controller, path, &d);
1223 if (r == -ENOENT)
1bcf3fc6 1224 return true;
efdb0237
LP
1225 if (r < 0)
1226 return r;
35d2e7ec 1227
efdb0237
LP
1228 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1229 _cleanup_free_ char *p = NULL;
1230
605405c6 1231 p = strjoin(path, "/", fn);
efdb0237
LP
1232 free(fn);
1233 if (!p)
1234 return -ENOMEM;
1235
1236 r = cg_is_empty_recursive(controller, p);
1237 if (r <= 0)
1238 return r;
1239 }
1240 if (r < 0)
1241 return r;
1242
1243 return true;
1244 }
35d2e7ec
LP
1245}
1246
1247int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 1248 char *t = NULL, *u = NULL;
efdb0237 1249 const char *e;
35d2e7ec
LP
1250
1251 assert(spec);
35d2e7ec
LP
1252
1253 if (*spec == '/') {
99be45a4 1254 if (!path_is_normalized(spec))
e884315e 1255 return -EINVAL;
35d2e7ec
LP
1256
1257 if (path) {
246aa6dd
LP
1258 t = strdup(spec);
1259 if (!t)
35d2e7ec
LP
1260 return -ENOMEM;
1261
858d36c1 1262 *path = path_simplify(t, false);
8c6db833
LP
1263 }
1264
35d2e7ec
LP
1265 if (controller)
1266 *controller = NULL;
1267
1268 return 0;
8c6db833
LP
1269 }
1270
246aa6dd
LP
1271 e = strchr(spec, ':');
1272 if (!e) {
185a0874 1273 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
1274 return -EINVAL;
1275
1276 if (controller) {
efdb0237 1277 t = strdup(spec);
246aa6dd 1278 if (!t)
35d2e7ec
LP
1279 return -ENOMEM;
1280
1281 *controller = t;
1282 }
1283
1284 if (path)
1285 *path = NULL;
1286
1287 return 0;
8c6db833
LP
1288 }
1289
efdb0237 1290 t = strndup(spec, e-spec);
e884315e
LP
1291 if (!t)
1292 return -ENOMEM;
185a0874 1293 if (!cg_controller_is_valid(t)) {
e884315e 1294 free(t);
35d2e7ec 1295 return -EINVAL;
246aa6dd
LP
1296 }
1297
efdb0237
LP
1298 if (isempty(e+1))
1299 u = NULL;
1300 else {
baa89da4
LP
1301 u = strdup(e+1);
1302 if (!u) {
1303 free(t);
1304 return -ENOMEM;
1305 }
35d2e7ec 1306
99be45a4 1307 if (!path_is_normalized(u) ||
baa89da4
LP
1308 !path_is_absolute(u)) {
1309 free(t);
1310 free(u);
1311 return -EINVAL;
1312 }
1313
858d36c1 1314 path_simplify(u, false);
baa89da4 1315 }
5954c074 1316
35d2e7ec
LP
1317 if (controller)
1318 *controller = t;
e884315e
LP
1319 else
1320 free(t);
35d2e7ec
LP
1321
1322 if (path)
1323 *path = u;
e884315e
LP
1324 else
1325 free(u);
35d2e7ec
LP
1326
1327 return 0;
8c6db833 1328}
c6c18be3 1329
7027ff61 1330int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
1331 _cleanup_free_ char *c = NULL, *p = NULL;
1332 char *t;
35d2e7ec
LP
1333 int r;
1334
1335 assert(path);
1336 assert(result);
1337
73e231ab 1338 /* First, check if it already is a filesystem path */
7027ff61 1339 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 1340
b69d29ce
LP
1341 t = strdup(path);
1342 if (!t)
35d2e7ec
LP
1343 return -ENOMEM;
1344
858d36c1 1345 *result = path_simplify(t, false);
35d2e7ec
LP
1346 return 0;
1347 }
1348
73e231ab 1349 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1350 r = cg_split_spec(path, &c, &p);
1351 if (r < 0)
35d2e7ec
LP
1352 return r;
1353
efdb0237 1354 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 1355}
1f73f0f1 1356
7027ff61 1357int cg_get_root_path(char **path) {
9444b1f2 1358 char *p, *e;
7027ff61
LP
1359 int r;
1360
1361 assert(path);
1362
9444b1f2 1363 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1364 if (r < 0)
1365 return r;
1366
efdb0237
LP
1367 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1368 if (!e)
1369 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1370 if (!e)
1371 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1372 if (e)
7027ff61
LP
1373 *e = 0;
1374
1f73f0f1
LP
1375 *path = p;
1376 return 0;
1377}
b59e2465 1378
751bc6ac
LP
1379int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1380 _cleanup_free_ char *rt = NULL;
1381 char *p;
ba1261bc
LP
1382 int r;
1383
e9174f29 1384 assert(cgroup);
751bc6ac 1385 assert(shifted);
e9174f29
LP
1386
1387 if (!root) {
1388 /* If the root was specified let's use that, otherwise
1389 * let's determine it from PID 1 */
1390
751bc6ac 1391 r = cg_get_root_path(&rt);
e9174f29
LP
1392 if (r < 0)
1393 return r;
1394
751bc6ac 1395 root = rt;
e9174f29 1396 }
ba1261bc 1397
751bc6ac 1398 p = path_startswith(cgroup, root);
efdb0237 1399 if (p && p > cgroup)
751bc6ac
LP
1400 *shifted = p - 1;
1401 else
1402 *shifted = cgroup;
1403
1404 return 0;
1405}
1406
1407int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1408 _cleanup_free_ char *raw = NULL;
1409 const char *c;
1410 int r;
1411
1412 assert(pid >= 0);
1413 assert(cgroup);
1414
1415 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1416 if (r < 0)
ba1261bc 1417 return r;
ba1261bc 1418
751bc6ac
LP
1419 r = cg_shift_path(raw, root, &c);
1420 if (r < 0)
1421 return r;
ba1261bc 1422
ae2a15bc
LP
1423 if (c == raw)
1424 *cgroup = TAKE_PTR(raw);
1425 else {
751bc6ac 1426 char *n;
ba1261bc 1427
751bc6ac
LP
1428 n = strdup(c);
1429 if (!n)
ba1261bc 1430 return -ENOMEM;
ba1261bc 1431
751bc6ac
LP
1432 *cgroup = n;
1433 }
ba1261bc
LP
1434
1435 return 0;
1436}
1437
9ed794a3 1438int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1439 char *c, *s;
1440 size_t n;
ef1673d1
MT
1441
1442 assert(cgroup);
6c03089c 1443 assert(unit);
ef1673d1 1444
8b0849e9
LP
1445 n = strcspn(cgroup, "/");
1446 if (n < 3)
1447 return -ENXIO;
1448
1449 c = strndupa(cgroup, n);
ae018d9b 1450 c = cg_unescape(c);
ef1673d1 1451
7410616c 1452 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1453 return -ENXIO;
ef1673d1 1454
d7bd3de0 1455 s = strdup(c);
6c03089c
LP
1456 if (!s)
1457 return -ENOMEM;
1458
1459 *unit = s;
ef1673d1
MT
1460 return 0;
1461}
1462
8b0849e9
LP
1463static bool valid_slice_name(const char *p, size_t n) {
1464
1465 if (!p)
1466 return false;
1467
fbd0b64f 1468 if (n < STRLEN("x.slice"))
8b0849e9
LP
1469 return false;
1470
1471 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1472 char buf[n+1], *c;
1473
1474 memcpy(buf, p, n);
1475 buf[n] = 0;
1476
1477 c = cg_unescape(buf);
1478
7410616c 1479 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1480 }
1481
1482 return false;
1483}
1484
9444b1f2 1485static const char *skip_slices(const char *p) {
8b0849e9
LP
1486 assert(p);
1487
9444b1f2
LP
1488 /* Skips over all slice assignments */
1489
1490 for (;;) {
1021b21b
LP
1491 size_t n;
1492
9444b1f2
LP
1493 p += strspn(p, "/");
1494
1495 n = strcspn(p, "/");
8b0849e9 1496 if (!valid_slice_name(p, n))
9444b1f2
LP
1497 return p;
1498
1499 p += n;
1500 }
1501}
1502
8b0849e9 1503int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1504 const char *e;
8b0849e9
LP
1505 char *unit;
1506 int r;
6c03089c
LP
1507
1508 assert(path);
8b0849e9 1509 assert(ret);
6c03089c 1510
9444b1f2 1511 e = skip_slices(path);
6c03089c 1512
8b0849e9
LP
1513 r = cg_path_decode_unit(e, &unit);
1514 if (r < 0)
1515 return r;
1516
1517 /* We skipped over the slices, don't accept any now */
1518 if (endswith(unit, ".slice")) {
1519 free(unit);
1520 return -ENXIO;
1521 }
1522
1523 *ret = unit;
1524 return 0;
6c03089c
LP
1525}
1526
1527int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1528 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1529 int r;
ba1261bc 1530
ef1673d1
MT
1531 assert(unit);
1532
7027ff61 1533 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1534 if (r < 0)
1535 return r;
1536
6c03089c
LP
1537 return cg_path_get_unit(cgroup, unit);
1538}
ef1673d1 1539
d4fffc4b
ZJS
1540/**
1541 * Skip session-*.scope, but require it to be there.
1542 */
9444b1f2
LP
1543static const char *skip_session(const char *p) {
1544 size_t n;
1545
8b0849e9
LP
1546 if (isempty(p))
1547 return NULL;
9444b1f2
LP
1548
1549 p += strspn(p, "/");
1550
1551 n = strcspn(p, "/");
fbd0b64f 1552 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1553 return NULL;
1554
8b0849e9
LP
1555 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1556 char buf[n - 8 - 6 + 1];
1557
1558 memcpy(buf, p + 8, n - 8 - 6);
1559 buf[n - 8 - 6] = 0;
d4fffc4b 1560
8b0849e9
LP
1561 /* Note that session scopes never need unescaping,
1562 * since they cannot conflict with the kernel's own
1563 * names, hence we don't need to call cg_unescape()
1564 * here. */
1565
1566 if (!session_id_valid(buf))
1567 return false;
1568
1569 p += n;
1570 p += strspn(p, "/");
1571 return p;
1572 }
1573
1574 return NULL;
d4fffc4b
ZJS
1575}
1576
1577/**
1578 * Skip user@*.service, but require it to be there.
1579 */
1580static const char *skip_user_manager(const char *p) {
1581 size_t n;
1582
8b0849e9
LP
1583 if (isempty(p))
1584 return NULL;
d4fffc4b
ZJS
1585
1586 p += strspn(p, "/");
1587
1588 n = strcspn(p, "/");
fbd0b64f 1589 if (n < STRLEN("user@x.service"))
6c03089c 1590 return NULL;
ef1673d1 1591
8b0849e9
LP
1592 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1593 char buf[n - 5 - 8 + 1];
9444b1f2 1594
8b0849e9
LP
1595 memcpy(buf, p + 5, n - 5 - 8);
1596 buf[n - 5 - 8] = 0;
1597
1598 /* Note that user manager services never need unescaping,
1599 * since they cannot conflict with the kernel's own
1600 * names, hence we don't need to call cg_unescape()
1601 * here. */
1602
1603 if (parse_uid(buf, NULL) < 0)
1604 return NULL;
1605
1606 p += n;
1607 p += strspn(p, "/");
1608
1609 return p;
1610 }
1611
1612 return NULL;
9444b1f2
LP
1613}
1614
329ac4bc 1615static const char *skip_user_prefix(const char *path) {
d4fffc4b 1616 const char *e, *t;
ef1673d1 1617
6c03089c 1618 assert(path);
ba1261bc 1619
9444b1f2
LP
1620 /* Skip slices, if there are any */
1621 e = skip_slices(path);
ba1261bc 1622
329ac4bc 1623 /* Skip the user manager, if it's in the path now... */
8b0849e9 1624 t = skip_user_manager(e);
329ac4bc
LP
1625 if (t)
1626 return t;
8b0849e9 1627
329ac4bc
LP
1628 /* Alternatively skip the user session if it is in the path... */
1629 return skip_session(e);
1630}
32081481 1631
329ac4bc
LP
1632int cg_path_get_user_unit(const char *path, char **ret) {
1633 const char *t;
6c03089c 1634
329ac4bc
LP
1635 assert(path);
1636 assert(ret);
8b0849e9 1637
329ac4bc
LP
1638 t = skip_user_prefix(path);
1639 if (!t)
8b0849e9 1640 return -ENXIO;
8b0849e9 1641
329ac4bc
LP
1642 /* And from here on it looks pretty much the same as for a
1643 * system unit, hence let's use the same parser from here
1644 * on. */
1645 return cg_path_get_unit(t, ret);
ef1673d1 1646}
ba1261bc 1647
ef1673d1 1648int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1649 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1650 int r;
1651
1652 assert(unit);
1653
7027ff61 1654 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1655 if (r < 0)
1656 return r;
1657
1658 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1659}
e884315e 1660
7027ff61 1661int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1662 _cleanup_free_ char *u = NULL;
1663 const char *sl;
89f7c846 1664 int r;
374ec6ab 1665
89f7c846
LP
1666 r = cg_path_get_unit(path, &u);
1667 if (r < 0)
1668 return r;
7027ff61 1669
efdb0237 1670 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1671 return readlink_malloc(sl, machine);
7027ff61
LP
1672}
1673
1674int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1675 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1676 int r;
1677
1678 assert(machine);
1679
1680 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1681 if (r < 0)
1682 return r;
1683
1684 return cg_path_get_machine_name(cgroup, machine);
1685}
1686
1687int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1688 _cleanup_free_ char *unit = NULL;
1689 char *start, *end;
1690 int r;
7027ff61
LP
1691
1692 assert(path);
7027ff61 1693
8b0849e9
LP
1694 r = cg_path_get_unit(path, &unit);
1695 if (r < 0)
1696 return r;
7027ff61 1697
8b0849e9
LP
1698 start = startswith(unit, "session-");
1699 if (!start)
cfeaa44a 1700 return -ENXIO;
8b0849e9
LP
1701 end = endswith(start, ".scope");
1702 if (!end)
cfeaa44a 1703 return -ENXIO;
8b0849e9
LP
1704
1705 *end = 0;
1706 if (!session_id_valid(start))
cfeaa44a 1707 return -ENXIO;
374ec6ab 1708
af08d2f9 1709 if (session) {
8b0849e9 1710 char *rr;
af08d2f9 1711
8b0849e9
LP
1712 rr = strdup(start);
1713 if (!rr)
af08d2f9
LP
1714 return -ENOMEM;
1715
8b0849e9 1716 *session = rr;
af08d2f9 1717 }
7027ff61 1718
7027ff61
LP
1719 return 0;
1720}
1721
1722int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1723 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1724 int r;
1725
7027ff61
LP
1726 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1727 if (r < 0)
1728 return r;
1729
1730 return cg_path_get_session(cgroup, session);
1731}
1732
ae018d9b 1733int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1734 _cleanup_free_ char *slice = NULL;
8b0849e9 1735 char *start, *end;
374ec6ab 1736 int r;
ae018d9b
LP
1737
1738 assert(path);
ae018d9b 1739
374ec6ab
LP
1740 r = cg_path_get_slice(path, &slice);
1741 if (r < 0)
1742 return r;
ae018d9b 1743
674eb685
LP
1744 start = startswith(slice, "user-");
1745 if (!start)
cfeaa44a 1746 return -ENXIO;
8b0849e9 1747 end = endswith(start, ".slice");
674eb685 1748 if (!end)
cfeaa44a 1749 return -ENXIO;
ae018d9b 1750
8b0849e9
LP
1751 *end = 0;
1752 if (parse_uid(start, uid) < 0)
cfeaa44a 1753 return -ENXIO;
674eb685 1754
674eb685 1755 return 0;
ae018d9b
LP
1756}
1757
1758int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1759 _cleanup_free_ char *cgroup = NULL;
1760 int r;
1761
ae018d9b
LP
1762 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1763 if (r < 0)
1764 return r;
1765
1766 return cg_path_get_owner_uid(cgroup, uid);
1767}
1768
1021b21b
LP
1769int cg_path_get_slice(const char *p, char **slice) {
1770 const char *e = NULL;
1021b21b
LP
1771
1772 assert(p);
1773 assert(slice);
1774
329ac4bc
LP
1775 /* Finds the right-most slice unit from the beginning, but
1776 * stops before we come to the first non-slice unit. */
1777
1021b21b
LP
1778 for (;;) {
1779 size_t n;
1780
1781 p += strspn(p, "/");
1782
1783 n = strcspn(p, "/");
8b0849e9 1784 if (!valid_slice_name(p, n)) {
1021b21b 1785
8b0849e9
LP
1786 if (!e) {
1787 char *s;
1021b21b 1788
e5d855d3 1789 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1790 if (!s)
1791 return -ENOMEM;
1021b21b 1792
8b0849e9
LP
1793 *slice = s;
1794 return 0;
1795 }
1796
1797 return cg_path_decode_unit(e, slice);
1021b21b
LP
1798 }
1799
1800 e = p;
1021b21b
LP
1801 p += n;
1802 }
1803}
1804
1805int cg_pid_get_slice(pid_t pid, char **slice) {
1806 _cleanup_free_ char *cgroup = NULL;
1807 int r;
1808
1809 assert(slice);
1810
1811 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1812 if (r < 0)
1813 return r;
1814
1815 return cg_path_get_slice(cgroup, slice);
1816}
1817
329ac4bc
LP
1818int cg_path_get_user_slice(const char *p, char **slice) {
1819 const char *t;
1820 assert(p);
1821 assert(slice);
1822
1823 t = skip_user_prefix(p);
1824 if (!t)
1825 return -ENXIO;
1826
1827 /* And now it looks pretty much the same as for a system
1828 * slice, so let's just use the same parser from here on. */
1829 return cg_path_get_slice(t, slice);
1830}
1831
1832int cg_pid_get_user_slice(pid_t pid, char **slice) {
1833 _cleanup_free_ char *cgroup = NULL;
1834 int r;
1835
1836 assert(slice);
1837
1838 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1839 if (r < 0)
1840 return r;
1841
1842 return cg_path_get_user_slice(cgroup, slice);
1843}
1844
ae018d9b
LP
1845char *cg_escape(const char *p) {
1846 bool need_prefix = false;
1847
1848 /* This implements very minimal escaping for names to be used
1849 * as file names in the cgroup tree: any name which might
1850 * conflict with a kernel name or is prefixed with '_' is
1851 * prefixed with a '_'. That way, when reading cgroup names it
1852 * is sufficient to remove a single prefixing underscore if
1853 * there is one. */
1854
1855 /* The return value of this function (unlike cg_unescape())
1856 * needs free()! */
1857
4c701096 1858 if (IN_SET(p[0], 0, '_', '.') ||
0cbd293e 1859 STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
efdb0237 1860 startswith(p, "cgroup."))
ae018d9b
LP
1861 need_prefix = true;
1862 else {
1863 const char *dot;
1864
1865 dot = strrchr(p, '.');
1866 if (dot) {
efdb0237
LP
1867 CGroupController c;
1868 size_t l = dot - p;
ae018d9b 1869
efdb0237
LP
1870 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1871 const char *n;
1872
1873 n = cgroup_controller_to_string(c);
ae018d9b 1874
efdb0237
LP
1875 if (l != strlen(n))
1876 continue;
ae018d9b 1877
efdb0237
LP
1878 if (memcmp(p, n, l) != 0)
1879 continue;
1880
1881 need_prefix = true;
1882 break;
ae018d9b
LP
1883 }
1884 }
1885 }
1886
1887 if (need_prefix)
1888 return strappend("_", p);
efdb0237
LP
1889
1890 return strdup(p);
ae018d9b
LP
1891}
1892
1893char *cg_unescape(const char *p) {
1894 assert(p);
1895
1896 /* The return value of this function (unlike cg_escape())
1897 * doesn't need free()! */
1898
1899 if (p[0] == '_')
1900 return (char*) p+1;
1901
1902 return (char*) p;
1903}
78edb35a
LP
1904
1905#define CONTROLLER_VALID \
4b549144 1906 DIGITS LETTERS \
78edb35a
LP
1907 "_"
1908
185a0874 1909bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1910 const char *t, *s;
1911
1912 if (!p)
1913 return false;
1914
b6629c4b
TH
1915 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1916 return true;
1917
185a0874
DJL
1918 s = startswith(p, "name=");
1919 if (s)
1920 p = s;
78edb35a 1921
4c701096 1922 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1923 return false;
1924
1925 for (t = p; *t; t++)
1926 if (!strchr(CONTROLLER_VALID, *t))
1927 return false;
1928
1929 if (t - p > FILENAME_MAX)
1930 return false;
1931
1932 return true;
1933}
a016b922
LP
1934
1935int cg_slice_to_path(const char *unit, char **ret) {
1936 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1937 const char *dash;
7410616c 1938 int r;
a016b922
LP
1939
1940 assert(unit);
1941 assert(ret);
1942
e5d855d3 1943 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1944 char *x;
1945
1946 x = strdup("");
1947 if (!x)
1948 return -ENOMEM;
1949 *ret = x;
1950 return 0;
1951 }
1952
7410616c 1953 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1954 return -EINVAL;
1955
1956 if (!endswith(unit, ".slice"))
1957 return -EINVAL;
1958
7410616c
LP
1959 r = unit_name_to_prefix(unit, &p);
1960 if (r < 0)
1961 return r;
a016b922
LP
1962
1963 dash = strchr(p, '-');
e66e5b61
LP
1964
1965 /* Don't allow initial dashes */
1966 if (dash == p)
1967 return -EINVAL;
1968
a016b922
LP
1969 while (dash) {
1970 _cleanup_free_ char *escaped = NULL;
1971 char n[dash - p + sizeof(".slice")];
1972
989290db 1973#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501
ZJS
1974 /* msan doesn't instrument stpncpy, so it thinks
1975 * n is later used unitialized:
1976 * https://github.com/google/sanitizers/issues/926
1977 */
1978 zero(n);
1979#endif
1980
e66e5b61 1981 /* Don't allow trailing or double dashes */
4c701096 1982 if (IN_SET(dash[1], 0, '-'))
c96cc582 1983 return -EINVAL;
a016b922 1984
c96cc582 1985 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1986 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1987 return -EINVAL;
1988
1989 escaped = cg_escape(n);
1990 if (!escaped)
1991 return -ENOMEM;
1992
1993 if (!strextend(&s, escaped, "/", NULL))
1994 return -ENOMEM;
1995
1996 dash = strchr(dash+1, '-');
1997 }
1998
1999 e = cg_escape(unit);
2000 if (!e)
2001 return -ENOMEM;
2002
2003 if (!strextend(&s, e, NULL))
2004 return -ENOMEM;
2005
ae2a15bc 2006 *ret = TAKE_PTR(s);
a016b922
LP
2007
2008 return 0;
2009}
4ad49000
LP
2010
2011int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2012 _cleanup_free_ char *p = NULL;
2013 int r;
2014
2015 r = cg_get_path(controller, path, attribute, &p);
2016 if (r < 0)
2017 return r;
2018
604028de 2019 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
2020}
2021
934277fe
LP
2022int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2023 _cleanup_free_ char *p = NULL;
2024 int r;
2025
2026 r = cg_get_path(controller, path, attribute, &p);
2027 if (r < 0)
2028 return r;
2029
2030 return read_one_line_file(p, ret);
2031}
2032
b734a4ff
LP
2033int cg_get_keyed_attribute(
2034 const char *controller,
2035 const char *path,
2036 const char *attribute,
2037 char **keys,
2038 char **ret_values) {
66ebf6c0 2039
b734a4ff 2040 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 2041 const char *p;
9177fa9f 2042 size_t n, i, n_done = 0;
b734a4ff
LP
2043 char **v;
2044 int r;
2045
4e1dfa45 2046 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
2047 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2048 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2049 *
2050 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
2051
2052 r = cg_get_path(controller, path, attribute, &filename);
2053 if (r < 0)
2054 return r;
2055
b734a4ff 2056 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
2057 if (r < 0)
2058 return r;
2059
b734a4ff
LP
2060 n = strv_length(keys);
2061 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2062 return 0;
66ebf6c0 2063
b734a4ff
LP
2064 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2065 v = newa0(char*, n);
66ebf6c0 2066
b734a4ff
LP
2067 for (p = contents; *p;) {
2068 const char *w = NULL;
b734a4ff 2069
9177fa9f
ZJS
2070 for (i = 0; i < n; i++)
2071 if (!v[i]) {
b734a4ff
LP
2072 w = first_word(p, keys[i]);
2073 if (w)
2074 break;
66ebf6c0 2075 }
66ebf6c0 2076
b734a4ff 2077 if (w) {
b734a4ff
LP
2078 size_t l;
2079
2080 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
2081 v[i] = strndup(w, l);
2082 if (!v[i]) {
b734a4ff
LP
2083 r = -ENOMEM;
2084 goto fail;
66ebf6c0 2085 }
b734a4ff 2086
b734a4ff 2087 n_done++;
b734a4ff
LP
2088 if (n_done >= n)
2089 goto done;
2090
2091 p = w + l;
9177fa9f 2092 } else
b734a4ff 2093 p += strcspn(p, NEWLINE);
b734a4ff
LP
2094
2095 p += strspn(p, NEWLINE);
66ebf6c0
TH
2096 }
2097
b734a4ff
LP
2098 r = -ENXIO;
2099
2100fail:
2101 for (i = 0; i < n; i++)
2102 free(v[i]);
2103
2104 return r;
2105
2106done:
2107 memcpy(ret_values, v, sizeof(char*) * n);
66ebf6c0 2108 return 0;
b734a4ff 2109
66ebf6c0
TH
2110}
2111
efdb0237
LP
2112int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2113 CGroupController c;
e353faa0 2114 CGroupMask done;
65be7e06 2115 bool created;
415fc41c 2116 int r;
4ad49000
LP
2117
2118 /* This one will create a cgroup in our private tree, but also
2119 * duplicate it in the trees specified in mask, and remove it
65be7e06
ZJS
2120 * in all others.
2121 *
2122 * Returns 0 if the group already existed in the systemd hierarchy,
2123 * 1 on success, negative otherwise.
2124 */
4ad49000
LP
2125
2126 /* First create the cgroup in our own hierarchy. */
2127 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2128 if (r < 0)
2129 return r;
490c5a37 2130 created = r;
4ad49000 2131
efdb0237 2132 /* If we are in the unified hierarchy, we are done now */
b4cccbc1
LP
2133 r = cg_all_unified();
2134 if (r < 0)
2135 return r;
2136 if (r > 0)
65be7e06 2137 return created;
efdb0237 2138
e353faa0
LP
2139 supported &= CGROUP_MASK_V1;
2140 mask = CGROUP_MASK_EXTEND_JOINED(mask);
2141 done = 0;
2142
efdb0237
LP
2143 /* Otherwise, do the same in the other hierarchies */
2144 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2145 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2146 const char *n;
2147
e353faa0 2148 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2149 continue;
2150
e353faa0
LP
2151 if (FLAGS_SET(done, bit))
2152 continue;
efdb0237 2153
e353faa0 2154 n = cgroup_controller_to_string(c);
f99850a0 2155 if (FLAGS_SET(mask, bit))
efdb0237 2156 (void) cg_create(n, path);
e353faa0 2157 else
efdb0237 2158 (void) cg_trim(n, path, true);
e353faa0
LP
2159
2160 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2161 }
2162
65be7e06 2163 return created;
4ad49000
LP
2164}
2165
efdb0237
LP
2166int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2167 CGroupController c;
e353faa0 2168 CGroupMask done;
415fc41c 2169 int r;
4ad49000
LP
2170
2171 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13b84ec7
LP
2172 if (r < 0)
2173 return r;
4ad49000 2174
b4cccbc1
LP
2175 r = cg_all_unified();
2176 if (r < 0)
2177 return r;
2178 if (r > 0)
efdb0237 2179 return 0;
7b3fd631 2180
e353faa0
LP
2181 supported &= CGROUP_MASK_V1;
2182 done = 0;
2183
efdb0237
LP
2184 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2185 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2186 const char *p = NULL;
7b3fd631 2187
e353faa0 2188 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2189 continue;
2190
e353faa0 2191 if (FLAGS_SET(done, bit))
efdb0237 2192 continue;
7b3fd631 2193
efdb0237
LP
2194 if (path_callback)
2195 p = path_callback(bit, userdata);
efdb0237
LP
2196 if (!p)
2197 p = path;
4ad49000 2198
efdb0237 2199 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
e353faa0 2200 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2201 }
2202
13b84ec7 2203 return 0;
4ad49000
LP
2204}
2205
efdb0237 2206int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
6c12b52e
LP
2207 Iterator i;
2208 void *pidp;
2209 int r = 0;
2210
2211 SET_FOREACH(pidp, pids, i) {
fea72cc0 2212 pid_t pid = PTR_TO_PID(pidp);
13b84ec7 2213 int q;
6c12b52e 2214
7b3fd631 2215 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
efdb0237 2216 if (q < 0 && r >= 0)
13b84ec7 2217 r = q;
6c12b52e
LP
2218 }
2219
2220 return r;
2221}
2222
efdb0237 2223int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
b3c5bad3 2224 CGroupController c;
e353faa0 2225 CGroupMask done;
b4cccbc1 2226 int r = 0, q;
4ad49000 2227
13b84ec7 2228 if (!path_equal(from, to)) {
1d98fef1 2229 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
13b84ec7
LP
2230 if (r < 0)
2231 return r;
2232 }
4ad49000 2233
b4cccbc1
LP
2234 q = cg_all_unified();
2235 if (q < 0)
2236 return q;
2237 if (q > 0)
efdb0237 2238 return r;
03b90d4b 2239
e353faa0
LP
2240 supported &= CGROUP_MASK_V1;
2241 done = 0;
2242
efdb0237
LP
2243 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2244 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2245 const char *p = NULL;
03b90d4b 2246
e353faa0 2247 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2248 continue;
2249
e353faa0 2250 if (FLAGS_SET(done, bit))
efdb0237 2251 continue;
03b90d4b 2252
efdb0237
LP
2253 if (to_callback)
2254 p = to_callback(bit, userdata);
efdb0237
LP
2255 if (!p)
2256 p = to;
2257
1d98fef1 2258 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
e353faa0 2259 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2260 }
2261
e353faa0 2262 return r;
4ad49000
LP
2263}
2264
efdb0237
LP
2265int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2266 CGroupController c;
e353faa0 2267 CGroupMask done;
b4cccbc1 2268 int r, q;
4ad49000
LP
2269
2270 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2271 if (r < 0)
2272 return r;
2273
b4cccbc1
LP
2274 q = cg_all_unified();
2275 if (q < 0)
2276 return q;
2277 if (q > 0)
efdb0237
LP
2278 return r;
2279
e353faa0
LP
2280 supported &= CGROUP_MASK_V1;
2281 done = 0;
2282
efdb0237
LP
2283 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2284 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2285
e353faa0 2286 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2287 continue;
2288
e353faa0 2289 if (FLAGS_SET(done, bit))
efdb0237 2290 continue;
4ad49000 2291
efdb0237 2292 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
e353faa0 2293 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2294 }
2295
e353faa0 2296 return r;
4ad49000
LP
2297}
2298
aae7e17f 2299int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
2300 _cleanup_free_ char *s = NULL;
2301 size_t n = 0, allocated = 0;
2302 bool space = false;
aae7e17f 2303 CGroupController c;
aae7e17f
FB
2304
2305 assert(ret);
2306
2307 if (mask == 0) {
2308 *ret = NULL;
2309 return 0;
2310 }
2311
2312 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
2313 const char *k;
2314 size_t l;
aae7e17f 2315
f99850a0 2316 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
2317 continue;
2318
ec635a2d
LP
2319 k = cgroup_controller_to_string(c);
2320 l = strlen(k);
2321
2322 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2323 return -ENOMEM;
2324
2325 if (space)
2326 s[n] = ' ';
2327 memcpy(s + n + space, k, l);
2328 n += space + l;
2329
2330 space = true;
aae7e17f
FB
2331 }
2332
ec635a2d 2333 assert(s);
aae7e17f 2334
ec635a2d 2335 s[n] = 0;
ae2a15bc 2336 *ret = TAKE_PTR(s);
ec635a2d 2337
aae7e17f
FB
2338 return 0;
2339}
2340
38a90d45
LP
2341int cg_mask_from_string(const char *value, CGroupMask *ret) {
2342 CGroupMask m = 0;
2343
2344 assert(ret);
aae7e17f
FB
2345 assert(value);
2346
2347 for (;;) {
2348 _cleanup_free_ char *n = NULL;
2349 CGroupController v;
2350 int r;
2351
2352 r = extract_first_word(&value, &n, NULL, 0);
2353 if (r < 0)
2354 return r;
2355 if (r == 0)
2356 break;
2357
2358 v = cgroup_controller_from_string(n);
2359 if (v < 0)
2360 continue;
2361
38a90d45 2362 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2363 }
38a90d45
LP
2364
2365 *ret = m;
aae7e17f
FB
2366 return 0;
2367}
2368
efdb0237 2369int cg_mask_supported(CGroupMask *ret) {
38a90d45 2370 CGroupMask mask;
415fc41c 2371 int r;
efdb0237 2372
67558d15
LP
2373 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2374 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2375 * pseudo-controllers. */
4ad49000 2376
b4cccbc1
LP
2377 r = cg_all_unified();
2378 if (r < 0)
2379 return r;
2380 if (r > 0) {
5f4c5fef 2381 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
2382
2383 /* In the unified hierarchy we can read the supported
2384 * and accessible controllers from a the top-level
2385 * cgroup attribute */
2386
5f4c5fef
LP
2387 r = cg_get_root_path(&root);
2388 if (r < 0)
2389 return r;
2390
2391 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2392 if (r < 0)
2393 return r;
2394
2395 r = read_one_line_file(path, &controllers);
efdb0237
LP
2396 if (r < 0)
2397 return r;
4ad49000 2398
aae7e17f
FB
2399 r = cg_mask_from_string(controllers, &mask);
2400 if (r < 0)
2401 return r;
efdb0237 2402
03afd780 2403 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
03a7b521 2404 * everything else off. */
03afd780 2405 mask &= CGROUP_MASK_V2;
efdb0237
LP
2406
2407 } else {
2408 CGroupController c;
2409
03afd780 2410 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 2411
38a90d45 2412 mask = 0;
efdb0237 2413 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2414 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2415 const char *n;
2416
03afd780
LP
2417 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2418 continue;
2419
efdb0237
LP
2420 n = cgroup_controller_to_string(c);
2421 if (controller_is_accessible(n) >= 0)
03afd780 2422 mask |= bit;
efdb0237 2423 }
4ad49000
LP
2424 }
2425
efdb0237
LP
2426 *ret = mask;
2427 return 0;
4ad49000 2428}
b12afc8c 2429
6925a0de
LP
2430int cg_kernel_controllers(Set **ret) {
2431 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 2432 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2433 int r;
2434
6925a0de 2435 assert(ret);
b12afc8c 2436
f09e86bc
LS
2437 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2438 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2439 * pseudo-controllers. */
e155a0aa 2440
6925a0de
LP
2441 controllers = set_new(&string_hash_ops);
2442 if (!controllers)
2443 return -ENOMEM;
2444
b12afc8c
LP
2445 f = fopen("/proc/cgroups", "re");
2446 if (!f) {
6925a0de
LP
2447 if (errno == ENOENT) {
2448 *ret = NULL;
b12afc8c 2449 return 0;
6925a0de
LP
2450 }
2451
b12afc8c
LP
2452 return -errno;
2453 }
2454
35bbbf85
LP
2455 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2456
b12afc8c 2457 /* Ignore the header line */
2351e44d 2458 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
2459
2460 for (;;) {
2461 char *controller;
2462 int enabled = 0;
2463
2464 errno = 0;
2465 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2466
2467 if (feof(f))
2468 break;
2469
b3267152 2470 if (ferror(f) && errno > 0)
b12afc8c
LP
2471 return -errno;
2472
2473 return -EBADMSG;
2474 }
2475
2476 if (!enabled) {
2477 free(controller);
2478 continue;
2479 }
2480
efdb0237 2481 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
2482 free(controller);
2483 return -EBADMSG;
2484 }
2485
2486 r = set_consume(controllers, controller);
2487 if (r < 0)
2488 return r;
2489 }
2490
1cc6c93a 2491 *ret = TAKE_PTR(controllers);
6925a0de 2492
b12afc8c
LP
2493 return 0;
2494}
efdb0237 2495
5da38d07
TH
2496static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2497
4e1dfa45 2498/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
c22800e4
LP
2499 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2500 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2501 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
f08e9287 2502 *
c22800e4
LP
2503 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2504 * process management but disable the compat dual layout, we return %true on
2505 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
f08e9287
TH
2506 */
2507static thread_local bool unified_systemd_v232;
2508
1fcca10e 2509static int cg_unified_update(void) {
efdb0237 2510
efdb0237
LP
2511 struct statfs fs;
2512
2513 /* Checks if we support the unified hierarchy. Returns an
2514 * error when the cgroup hierarchies aren't mounted yet or we
2515 * have any other trouble determining if the unified hierarchy
2516 * is supported. */
2517
5da38d07
TH
2518 if (unified_cache >= CGROUP_UNIFIED_NONE)
2519 return 0;
efdb0237
LP
2520
2521 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2522 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2523
9aa21133
ZJS
2524 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2525 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2526 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2527 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2528 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2529 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2530 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2531 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2532 unified_systemd_v232 = false;
f08e9287 2533 } else {
2977724b 2534 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 2535 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
2536
2537 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2538 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2539 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2540 unified_systemd_v232 = true;
2541 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2542 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2543 unified_cache = CGROUP_UNIFIED_NONE;
2544 } else {
2545 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2546 (unsigned long long) fs.f_type);
5535d8f7 2547 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2548 }
2977724b 2549 }
baaa35ad
ZJS
2550 } else
2551 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2552 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2553 (unsigned long long)fs.f_type);
efdb0237 2554
5da38d07
TH
2555 return 0;
2556}
2557
c22800e4 2558int cg_unified_controller(const char *controller) {
b4cccbc1 2559 int r;
5da38d07 2560
1fcca10e 2561 r = cg_unified_update();
b4cccbc1
LP
2562 if (r < 0)
2563 return r;
5da38d07 2564
fc9ae717
LP
2565 if (unified_cache == CGROUP_UNIFIED_NONE)
2566 return false;
2567
2568 if (unified_cache >= CGROUP_UNIFIED_ALL)
2569 return true;
2570
2571 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2572}
2573
b4cccbc1 2574int cg_all_unified(void) {
4bb652ac
LP
2575 int r;
2576
2577 r = cg_unified_update();
2578 if (r < 0)
2579 return r;
2580
2581 return unified_cache >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2582}
2583
b4cccbc1
LP
2584int cg_hybrid_unified(void) {
2585 int r;
2977724b 2586
1fcca10e 2587 r = cg_unified_update();
b4cccbc1
LP
2588 if (r < 0)
2589 return r;
2977724b 2590
f08e9287 2591 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2977724b
TH
2592}
2593
415fc41c 2594int cg_unified_flush(void) {
5da38d07 2595 unified_cache = CGROUP_UNIFIED_UNKNOWN;
415fc41c 2596
1fcca10e 2597 return cg_unified_update();
efdb0237
LP
2598}
2599
27adcc97
LP
2600int cg_enable_everywhere(
2601 CGroupMask supported,
2602 CGroupMask mask,
2603 const char *p,
2604 CGroupMask *ret_result_mask) {
2605
77fa610b 2606 _cleanup_fclose_ FILE *f = NULL;
efdb0237
LP
2607 _cleanup_free_ char *fs = NULL;
2608 CGroupController c;
27adcc97 2609 CGroupMask ret = 0;
415fc41c 2610 int r;
efdb0237
LP
2611
2612 assert(p);
2613
27adcc97
LP
2614 if (supported == 0) {
2615 if (ret_result_mask)
2616 *ret_result_mask = 0;
efdb0237 2617 return 0;
27adcc97 2618 }
efdb0237 2619
b4cccbc1
LP
2620 r = cg_all_unified();
2621 if (r < 0)
2622 return r;
27adcc97
LP
2623 if (r == 0) {
2624 /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
2625 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
2626 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
2627 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
2628 * minimize surprises here and reduce triggers for re-realization by always saying we fully
2629 * succeeded.) */
2630 if (ret_result_mask)
2631 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
2632 * CGROUP_MASK_V2: The 'supported' mask
2633 * might contain pure-V1 or BPF
2634 * controllers, and we never want to
2635 * claim that we could enable those with
2636 * cgroup.subtree_control */
efdb0237 2637 return 0;
27adcc97 2638 }
efdb0237
LP
2639
2640 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2641 if (r < 0)
2642 return r;
2643
2644 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2645 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2646 const char *n;
2647
ab275f23
LP
2648 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
2649 continue;
2650
f99850a0 2651 if (!FLAGS_SET(supported, bit))
efdb0237
LP
2652 continue;
2653
2654 n = cgroup_controller_to_string(c);
2655 {
2656 char s[1 + strlen(n) + 1];
2657
f99850a0 2658 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
efdb0237
LP
2659 strcpy(s + 1, n);
2660
77fa610b
LP
2661 if (!f) {
2662 f = fopen(fs, "we");
54b5ba1d
LP
2663 if (!f)
2664 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
77fa610b
LP
2665 }
2666
604028de 2667 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
96aa6591 2668 if (r < 0) {
94f344fb
LP
2669 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
2670 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
96aa6591 2671 clearerr(f);
27adcc97
LP
2672
2673 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
2674 * happens for example when we attempt to turn off a controller up in the tree that is
2675 * used down in the tree. */
2676 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
2677 * only here, and not follow the same logic
2678 * for other errors such as EINVAL or
2679 * EOPNOTSUPP or anything else. That's
2680 * because EBUSY indicates that the
2681 * controllers is currently enabled and
2682 * cannot be disabled because something down
2683 * the hierarchy is still using it. Any other
2684 * error most likely means something like "I
2685 * never heard of this controller" or
2686 * similar. In the former case it's hence
2687 * safe to assume the controller is still on
2688 * after the failed operation, while in the
2689 * latter case it's safer to assume the
2690 * controller is unknown and hence certainly
2691 * not enabled. */
2692 ret |= bit;
2693 } else {
2694 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
2695 if (FLAGS_SET(mask, bit))
2696 ret |= bit;
96aa6591 2697 }
efdb0237
LP
2698 }
2699 }
2700
27adcc97
LP
2701 /* Let's return the precise set of controllers now enabled for the cgroup. */
2702 if (ret_result_mask)
2703 *ret_result_mask = ret;
2704
efdb0237
LP
2705 return 0;
2706}
2707
2708bool cg_is_unified_wanted(void) {
2709 static thread_local int wanted = -1;
415fc41c 2710 int r;
1d84ad94 2711 bool b;
77fab2a9 2712 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
5f086dc7 2713 _cleanup_free_ char *c = NULL;
efdb0237 2714
77fab2a9 2715 /* If we have a cached value, return that. */
efdb0237
LP
2716 if (wanted >= 0)
2717 return wanted;
2718
239a3d09
ZJS
2719 /* If the hierarchy is already mounted, then follow whatever
2720 * was chosen for it. */
2721 if (cg_unified_flush() >= 0)
b4cccbc1 2722 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
239a3d09 2723
5f086dc7
CD
2724 /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
2725 * respect that. */
1d84ad94 2726 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
5f086dc7
CD
2727 if (r > 0)
2728 return (wanted = b);
2729
2730 /* If we passed cgroup_no_v1=all with no other instructions, it seems
2731 * highly unlikely that we want to use hybrid or legacy hierarchy. */
2732 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
2733 if (r > 0 && streq_ptr(c, "all"))
2734 return (wanted = true);
efdb0237 2735
5f086dc7 2736 return (wanted = is_default);
efdb0237
LP
2737}
2738
2739bool cg_is_legacy_wanted(void) {
239a3d09
ZJS
2740 static thread_local int wanted = -1;
2741
2742 /* If we have a cached value, return that. */
2743 if (wanted >= 0)
2744 return wanted;
2745
4e1dfa45 2746 /* Check if we have cgroup v2 already mounted. */
1b59cf04
ZJS
2747 if (cg_unified_flush() >= 0 &&
2748 unified_cache == CGROUP_UNIFIED_ALL)
239a3d09 2749 return (wanted = false);
1b59cf04
ZJS
2750
2751 /* Otherwise, assume that at least partial legacy is wanted,
4e1dfa45 2752 * since cgroup v2 should already be mounted at this point. */
239a3d09 2753 return (wanted = true);
efdb0237
LP
2754}
2755
a4464b95 2756bool cg_is_hybrid_wanted(void) {
5da38d07 2757 static thread_local int wanted = -1;
415fc41c 2758 int r;
1d84ad94 2759 bool b;
c19739db
ZJS
2760 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2761 /* We default to true if the default is "hybrid", obviously,
2762 * but also when the default is "unified", because if we get
2763 * called, it means that unified hierarchy was not mounted. */
5da38d07 2764
77fab2a9 2765 /* If we have a cached value, return that. */
5da38d07
TH
2766 if (wanted >= 0)
2767 return wanted;
2768
239a3d09
ZJS
2769 /* If the hierarchy is already mounted, then follow whatever
2770 * was chosen for it. */
2771 if (cg_unified_flush() >= 0 &&
2772 unified_cache == CGROUP_UNIFIED_ALL)
2773 return (wanted = false);
2774
77fab2a9
ZJS
2775 /* Otherwise, let's see what the kernel command line has to say.
2776 * Since checking is expensive, cache a non-error result. */
1d84ad94 2777 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
5da38d07 2778
2dcb526d
ZJS
2779 /* The meaning of the kernel option is reversed wrt. to the return value
2780 * of this function, hence the negation. */
77fab2a9 2781 return (wanted = r > 0 ? !b : is_default);
5da38d07
TH
2782}
2783
13c31542
TH
2784int cg_weight_parse(const char *s, uint64_t *ret) {
2785 uint64_t u;
2786 int r;
2787
2788 if (isempty(s)) {
2789 *ret = CGROUP_WEIGHT_INVALID;
2790 return 0;
2791 }
2792
2793 r = safe_atou64(s, &u);
2794 if (r < 0)
2795 return r;
2796
2797 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2798 return -ERANGE;
2799
2800 *ret = u;
2801 return 0;
2802}
2803
9be57249
TH
2804const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2805 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2806 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2807 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2808 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2809};
2810
2811static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2812 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2813 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2814 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2815 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2816};
2817
2818DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2819
d53d9474
LP
2820int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2821 uint64_t u;
2822 int r;
2823
2824 if (isempty(s)) {
2825 *ret = CGROUP_CPU_SHARES_INVALID;
2826 return 0;
2827 }
2828
2829 r = safe_atou64(s, &u);
2830 if (r < 0)
2831 return r;
2832
2833 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2834 return -ERANGE;
2835
2836 *ret = u;
2837 return 0;
2838}
2839
2840int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2841 uint64_t u;
2842 int r;
2843
2844 if (isempty(s)) {
2845 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2846 return 0;
2847 }
2848
2849 r = safe_atou64(s, &u);
2850 if (r < 0)
2851 return r;
2852
2853 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2854 return -ERANGE;
2855
2856 *ret = u;
2857 return 0;
2858}
2859
f0bef277
EV
2860bool is_cgroup_fs(const struct statfs *s) {
2861 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2862 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2863}
2864
2865bool fd_is_cgroup_fs(int fd) {
2866 struct statfs s;
2867
2868 if (fstatfs(fd, &s) < 0)
2869 return -errno;
2870
2871 return is_cgroup_fs(&s);
2872}
2873
efdb0237
LP
2874static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2875 [CGROUP_CONTROLLER_CPU] = "cpu",
2876 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
13c31542 2877 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2878 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2879 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2880 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2881 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2882 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2883 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2884};
2885
2886DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2887
2888CGroupMask get_cpu_accounting_mask(void) {
2889 static CGroupMask needed_mask = (CGroupMask) -1;
2890
2891 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2892 * provided externally from the CPU controller, which means we don't
2893 * need to enable the CPU controller just to get metrics. This is good,
2894 * because enabling the CPU controller comes at a minor performance
2895 * hit, especially when it's propagated deep into large hierarchies.
2896 * There's also no separate CPU accounting controller available within
2897 * a unified hierarchy.
2898 *
2899 * This combination of factors results in the desired cgroup mask to
2900 * enable for CPU accounting varying as follows:
2901 *
2902 * ╔═════════════════════╤═════════════════════╗
2903 * ║ Linux ≥4.15 │ Linux <4.15 ║
2904 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2905 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2906 * ╟───────────────╫─────────────────────┼─────────────────────╢
2907 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2908 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2909 *
2910 * We check kernel version here instead of manually checking whether
2911 * cpu.stat is present for every cgroup, as that check in itself would
2912 * already be fairly expensive.
2913 *
2914 * Kernels where this patch has been backported will therefore have the
2915 * CPU controller enabled unnecessarily. This is more expensive than
2916 * necessary, but harmless. ☺️
2917 */
2918
2919 if (needed_mask == (CGroupMask) -1) {
2920 if (cg_all_unified()) {
2921 struct utsname u;
2922 assert_se(uname(&u) >= 0);
2923
2924 if (str_verscmp(u.release, "4.15") < 0)
2925 needed_mask = CGROUP_MASK_CPU;
2926 else
2927 needed_mask = 0;
2928 } else
2929 needed_mask = CGROUP_MASK_CPUACCT;
2930 }
2931
2932 return needed_mask;
2933}
2934
2935bool cpu_accounting_is_cheap(void) {
2936 return get_cpu_accounting_mask() == 0;
2937}