]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
Make fopen_temporary and fopen_temporary_label unlocked
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833 2
84ac7bea 3#include <dirent.h>
8c6db833 4#include <errno.h>
84ac7bea 5#include <ftw.h>
11c3a366 6#include <limits.h>
8c6db833 7#include <signal.h>
11c3a366 8#include <stddef.h>
8c6db833 9#include <stdlib.h>
84ac7bea 10#include <string.h>
672c48cc 11#include <sys/stat.h>
11c3a366 12#include <sys/statfs.h>
672c48cc 13#include <sys/types.h>
f98c2585 14#include <sys/utsname.h>
4b58153d 15#include <sys/xattr.h>
84ac7bea 16#include <unistd.h>
8c6db833 17
b5efdb8a 18#include "alloc-util.h"
3ffd4af2 19#include "cgroup-util.h"
93cc7779 20#include "def.h"
a0956174 21#include "dirent-util.h"
84ac7bea 22#include "extract-word.h"
3ffd4af2 23#include "fd-util.h"
84ac7bea 24#include "fileio.h"
f97b34a6 25#include "format-util.h"
f4f15635 26#include "fs-util.h"
93cc7779 27#include "log.h"
84ac7bea
LP
28#include "login-util.h"
29#include "macro.h"
93cc7779 30#include "missing.h"
84ac7bea 31#include "mkdir.h"
6bedfcbb 32#include "parse-util.h"
9eb977db 33#include "path-util.h"
872a590e 34#include "proc-cmdline.h"
84ac7bea
LP
35#include "process-util.h"
36#include "set.h"
9444b1f2 37#include "special.h"
872a590e 38#include "stat-util.h"
d054f0a4 39#include "stdio-util.h"
8b43440b 40#include "string-table.h"
07630cea 41#include "string-util.h"
aae7e17f 42#include "strv.h"
84ac7bea 43#include "unit-name.h"
b1d4f8e1 44#include "user-util.h"
8c6db833 45
c6c18be3 46int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
7027ff61 47 _cleanup_free_ char *fs = NULL;
c6c18be3 48 FILE *f;
7027ff61 49 int r;
c6c18be3 50
c6c18be3
LP
51 assert(_f);
52
c3175a7f
LP
53 r = cg_get_path(controller, path, "cgroup.procs", &fs);
54 if (r < 0)
c6c18be3
LP
55 return r;
56
57 f = fopen(fs, "re");
c6c18be3
LP
58 if (!f)
59 return -errno;
60
61 *_f = f;
62 return 0;
63}
64
c6c18be3
LP
65int cg_read_pid(FILE *f, pid_t *_pid) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
70
7027ff61
LP
71 assert(f);
72 assert(_pid);
73
c6c18be3
LP
74 errno = 0;
75 if (fscanf(f, "%lu", &ul) != 1) {
76
77 if (feof(f))
78 return 0;
79
f5e5c28f 80 return errno > 0 ? -errno : -EIO;
c6c18be3
LP
81 }
82
83 if (ul <= 0)
84 return -EIO;
85
86 *_pid = (pid_t) ul;
87 return 1;
88}
89
8b238b13
LP
90int cg_read_event(
91 const char *controller,
92 const char *path,
93 const char *event,
94 char **val) {
95
ab2c3861
TH
96 _cleanup_free_ char *events = NULL, *content = NULL;
97 char *p, *line;
98 int r;
99
100 r = cg_get_path(controller, path, "cgroup.events", &events);
101 if (r < 0)
102 return r;
103
104 r = read_full_file(events, &content, NULL);
105 if (r < 0)
106 return r;
107
108 p = content;
109 while ((line = strsep(&p, "\n"))) {
110 char *key;
111
112 key = strsep(&line, " ");
113 if (!key || !line)
114 return -EINVAL;
115
116 if (strcmp(key, event))
117 continue;
118
119 *val = strdup(line);
120 return 0;
121 }
122
123 return -ENOENT;
124}
125
3228995c
CB
126bool cg_ns_supported(void) {
127 static thread_local int enabled = -1;
128
129 if (enabled >= 0)
130 return enabled;
131
0887fa71
LP
132 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
133 if (errno != ENOENT)
134 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
135 enabled = false;
136 } else
137 enabled = true;
3228995c
CB
138
139 return enabled;
140}
141
35d2e7ec 142int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 143 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
144 int r;
145 DIR *d;
146
35d2e7ec
LP
147 assert(_d);
148
149 /* This is not recursive! */
150
c3175a7f
LP
151 r = cg_get_path(controller, path, NULL, &fs);
152 if (r < 0)
35d2e7ec
LP
153 return r;
154
155 d = opendir(fs);
35d2e7ec
LP
156 if (!d)
157 return -errno;
158
159 *_d = d;
160 return 0;
161}
162
163int cg_read_subgroup(DIR *d, char **fn) {
164 struct dirent *de;
165
166 assert(d);
7027ff61 167 assert(fn);
35d2e7ec 168
f01327ad 169 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
170 char *b;
171
172 if (de->d_type != DT_DIR)
173 continue;
174
49bfc877 175 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
176 continue;
177
7027ff61
LP
178 b = strdup(de->d_name);
179 if (!b)
35d2e7ec
LP
180 return -ENOMEM;
181
182 *fn = b;
183 return 1;
184 }
185
35d2e7ec
LP
186 return 0;
187}
188
4ad49000 189int cg_rmdir(const char *controller, const char *path) {
7027ff61 190 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
191 int r;
192
ad293f5a
LP
193 r = cg_get_path(controller, path, NULL, &p);
194 if (r < 0)
35d2e7ec
LP
195 return r;
196
197 r = rmdir(p);
7027ff61
LP
198 if (r < 0 && errno != ENOENT)
199 return -errno;
35d2e7ec 200
b4cccbc1 201 r = cg_hybrid_unified();
f20db199 202 if (r <= 0)
b4cccbc1 203 return r;
b4cccbc1
LP
204
205 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
206 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
207 if (r < 0)
208 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
209 }
210
7027ff61 211 return 0;
35d2e7ec
LP
212}
213
1d98fef1
LP
214int cg_kill(
215 const char *controller,
216 const char *path,
217 int sig,
218 CGroupFlags flags,
219 Set *s,
220 cg_kill_log_func_t log_kill,
221 void *userdata) {
222
7027ff61 223 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 224 bool done = false;
c53d2d54 225 int r, ret = 0, ret_log_kill = 0;
35d2e7ec 226 pid_t my_pid;
8c6db833 227
8c6db833
LP
228 assert(sig >= 0);
229
0d5b4810
LP
230 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
231 * SIGCONT on SIGKILL. */
232 if (IN_SET(sig, SIGCONT, SIGKILL))
233 flags &= ~CGROUP_SIGCONT;
234
8c6db833
LP
235 /* This goes through the tasks list and kills them all. This
236 * is repeated until no further processes are added to the
237 * tasks list, to properly handle forking processes */
238
7027ff61 239 if (!s) {
d5099efc 240 s = allocated_set = set_new(NULL);
7027ff61 241 if (!s)
ca949c9d 242 return -ENOMEM;
7027ff61 243 }
8c6db833 244
df0ff127 245 my_pid = getpid_cached();
8c6db833
LP
246
247 do {
7027ff61 248 _cleanup_fclose_ FILE *f = NULL;
0b172489 249 pid_t pid = 0;
8c6db833
LP
250 done = true;
251
7027ff61
LP
252 r = cg_enumerate_processes(controller, path, &f);
253 if (r < 0) {
4c633005 254 if (ret >= 0 && r != -ENOENT)
7027ff61 255 return r;
35d2e7ec 256
7027ff61 257 return ret;
35d2e7ec 258 }
c6c18be3
LP
259
260 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 261
1d98fef1 262 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 263 continue;
8c6db833 264
fea72cc0 265 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 266 continue;
8c6db833 267
1d98fef1 268 if (log_kill)
c53d2d54 269 ret_log_kill = log_kill(pid, sig, userdata);
1d98fef1 270
8c6db833
LP
271 /* If we haven't killed this process yet, kill
272 * it */
4c633005
LP
273 if (kill(pid, sig) < 0) {
274 if (ret >= 0 && errno != ESRCH)
8c6db833 275 ret = -errno;
6e8314c4 276 } else {
1d98fef1 277 if (flags & CGROUP_SIGCONT)
e155a0aa 278 (void) kill(pid, SIGCONT);
430c18ed 279
c53d2d54
DB
280 if (ret == 0) {
281 if (log_kill)
282 ret = ret_log_kill;
283 else
284 ret = 1;
285 }
430c18ed 286 }
8c6db833 287
8c6db833
LP
288 done = false;
289
fea72cc0 290 r = set_put(s, PID_TO_PTR(pid));
7027ff61 291 if (r < 0) {
35d2e7ec 292 if (ret >= 0)
7027ff61 293 return r;
35d2e7ec 294
7027ff61 295 return ret;
35d2e7ec
LP
296 }
297 }
298
299 if (r < 0) {
300 if (ret >= 0)
7027ff61 301 return r;
35d2e7ec 302
7027ff61 303 return ret;
8c6db833
LP
304 }
305
8c6db833
LP
306 /* To avoid racing against processes which fork
307 * quicker than we can kill them we repeat this until
308 * no new pids need to be killed. */
309
35d2e7ec 310 } while (!done);
8c6db833 311
35d2e7ec 312 return ret;
8c6db833
LP
313}
314
1d98fef1
LP
315int cg_kill_recursive(
316 const char *controller,
317 const char *path,
318 int sig,
319 CGroupFlags flags,
320 Set *s,
321 cg_kill_log_func_t log_kill,
322 void *userdata) {
323
7027ff61
LP
324 _cleanup_set_free_ Set *allocated_set = NULL;
325 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 326 int r, ret;
35d2e7ec 327 char *fn;
8c6db833
LP
328
329 assert(path);
8c6db833
LP
330 assert(sig >= 0);
331
7027ff61 332 if (!s) {
d5099efc 333 s = allocated_set = set_new(NULL);
7027ff61 334 if (!s)
ca949c9d 335 return -ENOMEM;
7027ff61 336 }
ca949c9d 337
1d98fef1 338 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 339
7027ff61
LP
340 r = cg_enumerate_subgroups(controller, path, &d);
341 if (r < 0) {
4c633005 342 if (ret >= 0 && r != -ENOENT)
7027ff61 343 return r;
8c6db833 344
7027ff61 345 return ret;
35d2e7ec 346 }
8c6db833 347
35d2e7ec 348 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 349 _cleanup_free_ char *p = NULL;
8c6db833 350
605405c6 351 p = strjoin(path, "/", fn);
35d2e7ec 352 free(fn);
7027ff61
LP
353 if (!p)
354 return -ENOMEM;
8c6db833 355
1d98fef1 356 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 357 if (r != 0 && ret >= 0)
35d2e7ec 358 ret = r;
8c6db833 359 }
7027ff61 360 if (ret >= 0 && r < 0)
35d2e7ec
LP
361 ret = r;
362
1d98fef1 363 if (flags & CGROUP_REMOVE) {
4ad49000 364 r = cg_rmdir(controller, path);
4c701096 365 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
366 return r;
367 }
ca949c9d 368
8c6db833
LP
369 return ret;
370}
371
1d98fef1
LP
372int cg_migrate(
373 const char *cfrom,
374 const char *pfrom,
375 const char *cto,
376 const char *pto,
377 CGroupFlags flags) {
378
35d2e7ec 379 bool done = false;
246aa6dd 380 _cleanup_set_free_ Set *s = NULL;
8c6db833
LP
381 int r, ret = 0;
382 pid_t my_pid;
383
246aa6dd
LP
384 assert(cfrom);
385 assert(pfrom);
386 assert(cto);
387 assert(pto);
8c6db833 388
d5099efc 389 s = set_new(NULL);
246aa6dd 390 if (!s)
35d2e7ec
LP
391 return -ENOMEM;
392
df0ff127 393 my_pid = getpid_cached();
8c6db833
LP
394
395 do {
7027ff61 396 _cleanup_fclose_ FILE *f = NULL;
0b172489 397 pid_t pid = 0;
8c6db833
LP
398 done = true;
399
b043cd0b 400 r = cg_enumerate_processes(cfrom, pfrom, &f);
246aa6dd 401 if (r < 0) {
4c633005 402 if (ret >= 0 && r != -ENOENT)
7027ff61 403 return r;
35d2e7ec 404
246aa6dd 405 return ret;
35d2e7ec 406 }
c6c18be3
LP
407
408 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 409
35d2e7ec
LP
410 /* This might do weird stuff if we aren't a
411 * single-threaded program. However, we
412 * luckily know we are not */
1d98fef1 413 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 414 continue;
8c6db833 415
fea72cc0 416 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
35d2e7ec
LP
417 continue;
418
9b84c7f9
LP
419 /* Ignore kernel threads. Since they can only
420 * exist in the root cgroup, we only check for
421 * them there. */
422 if (cfrom &&
57ea45e1 423 empty_or_root(pfrom) &&
9b84c7f9
LP
424 is_kernel_thread(pid) > 0)
425 continue;
426
246aa6dd
LP
427 r = cg_attach(cto, pto, pid);
428 if (r < 0) {
4c633005 429 if (ret >= 0 && r != -ESRCH)
35d2e7ec
LP
430 ret = r;
431 } else if (ret == 0)
432 ret = 1;
8c6db833 433
8c6db833 434 done = false;
35d2e7ec 435
fea72cc0 436 r = set_put(s, PID_TO_PTR(pid));
246aa6dd 437 if (r < 0) {
35d2e7ec 438 if (ret >= 0)
7027ff61 439 return r;
35d2e7ec 440
246aa6dd 441 return ret;
35d2e7ec
LP
442 }
443 }
444
445 if (r < 0) {
446 if (ret >= 0)
7027ff61 447 return r;
35d2e7ec 448
246aa6dd 449 return ret;
8c6db833 450 }
35d2e7ec 451 } while (!done);
8c6db833 452
35d2e7ec 453 return ret;
8c6db833
LP
454}
455
4ad49000
LP
456int cg_migrate_recursive(
457 const char *cfrom,
458 const char *pfrom,
459 const char *cto,
460 const char *pto,
1d98fef1 461 CGroupFlags flags) {
4ad49000 462
246aa6dd 463 _cleanup_closedir_ DIR *d = NULL;
7027ff61 464 int r, ret = 0;
35d2e7ec 465 char *fn;
8c6db833 466
246aa6dd
LP
467 assert(cfrom);
468 assert(pfrom);
469 assert(cto);
470 assert(pto);
8c6db833 471
1d98fef1 472 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
8c6db833 473
246aa6dd
LP
474 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
475 if (r < 0) {
4c633005 476 if (ret >= 0 && r != -ENOENT)
7027ff61
LP
477 return r;
478
246aa6dd 479 return ret;
35d2e7ec
LP
480 }
481
482 while ((r = cg_read_subgroup(d, &fn)) > 0) {
246aa6dd 483 _cleanup_free_ char *p = NULL;
8c6db833 484
605405c6 485 p = strjoin(pfrom, "/", fn);
35d2e7ec 486 free(fn);
e155a0aa
LP
487 if (!p)
488 return -ENOMEM;
8c6db833 489
1d98fef1 490 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
35d2e7ec
LP
491 if (r != 0 && ret >= 0)
492 ret = r;
8c6db833
LP
493 }
494
35d2e7ec
LP
495 if (r < 0 && ret >= 0)
496 ret = r;
497
1d98fef1 498 if (flags & CGROUP_REMOVE) {
4ad49000 499 r = cg_rmdir(cfrom, pfrom);
4c701096 500 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
246aa6dd
LP
501 return r;
502 }
8c6db833
LP
503
504 return ret;
505}
506
13b84ec7
LP
507int cg_migrate_recursive_fallback(
508 const char *cfrom,
509 const char *pfrom,
510 const char *cto,
511 const char *pto,
1d98fef1 512 CGroupFlags flags) {
13b84ec7
LP
513
514 int r;
515
516 assert(cfrom);
517 assert(pfrom);
518 assert(cto);
519 assert(pto);
520
1d98fef1 521 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
13b84ec7
LP
522 if (r < 0) {
523 char prefix[strlen(pto) + 1];
524
525 /* This didn't work? Then let's try all prefixes of the destination */
526
fecffe5d 527 PATH_FOREACH_PREFIX(prefix, pto) {
e155a0aa
LP
528 int q;
529
1d98fef1 530 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
e155a0aa
LP
531 if (q >= 0)
532 return q;
13b84ec7
LP
533 }
534 }
535
e155a0aa 536 return r;
13b84ec7
LP
537}
538
efdb0237
LP
539static const char *controller_to_dirname(const char *controller) {
540 const char *e;
3474ae3c 541
7027ff61
LP
542 assert(controller);
543
efdb0237
LP
544 /* Converts a controller name to the directory name below
545 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
546 * just cuts off the name= prefixed used for named
547 * hierarchies, if it is specified. */
548
2977724b 549 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 550 if (cg_hybrid_unified() > 0)
2977724b
TH
551 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
552 else
553 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
554 }
b6629c4b 555
efdb0237
LP
556 e = startswith(controller, "name=");
557 if (e)
558 return e;
559
560 return controller;
3474ae3c
LP
561}
562
569b19d8
LP
563static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
564 const char *dn;
018ef268 565 char *t = NULL;
3474ae3c 566
efdb0237 567 assert(fs);
569b19d8
LP
568 assert(controller);
569
570 dn = controller_to_dirname(controller);
efdb0237
LP
571
572 if (isempty(path) && isempty(suffix))
569b19d8 573 t = strappend("/sys/fs/cgroup/", dn);
efdb0237 574 else if (isempty(path))
605405c6 575 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
efdb0237 576 else if (isempty(suffix))
605405c6 577 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
efdb0237 578 else
605405c6 579 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
efdb0237
LP
580 if (!t)
581 return -ENOMEM;
3474ae3c 582
efdb0237
LP
583 *fs = t;
584 return 0;
585}
586
587static int join_path_unified(const char *path, const char *suffix, char **fs) {
588 char *t;
589
590 assert(fs);
591
592 if (isempty(path) && isempty(suffix))
593 t = strdup("/sys/fs/cgroup");
594 else if (isempty(path))
595 t = strappend("/sys/fs/cgroup/", suffix);
596 else if (isempty(suffix))
597 t = strappend("/sys/fs/cgroup/", path);
598 else
605405c6 599 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
3474ae3c
LP
600 if (!t)
601 return -ENOMEM;
602
efdb0237 603 *fs = t;
3474ae3c
LP
604 return 0;
605}
606
8c6db833 607int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 608 int r;
8c6db833 609
dbd821ac
LP
610 assert(fs);
611
efdb0237
LP
612 if (!controller) {
613 char *t;
614
569b19d8
LP
615 /* If no controller is specified, we return the path
616 * *below* the controllers, without any prefix. */
efdb0237
LP
617
618 if (!path && !suffix)
619 return -EINVAL;
620
989189ea 621 if (!suffix)
efdb0237 622 t = strdup(path);
989189ea 623 else if (!path)
efdb0237
LP
624 t = strdup(suffix);
625 else
605405c6 626 t = strjoin(path, "/", suffix);
efdb0237
LP
627 if (!t)
628 return -ENOMEM;
629
858d36c1 630 *fs = path_simplify(t, false);
efdb0237
LP
631 return 0;
632 }
633
634 if (!cg_controller_is_valid(controller))
78edb35a
LP
635 return -EINVAL;
636
b4cccbc1
LP
637 r = cg_all_unified();
638 if (r < 0)
639 return r;
640 if (r > 0)
efdb0237 641 r = join_path_unified(path, suffix, fs);
569b19d8
LP
642 else
643 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
644 if (r < 0)
645 return r;
7027ff61 646
858d36c1 647 path_simplify(*fs, false);
efdb0237 648 return 0;
3474ae3c 649}
dbd821ac 650
efdb0237 651static int controller_is_accessible(const char *controller) {
b4cccbc1 652 int r;
37099707 653
efdb0237 654 assert(controller);
37099707 655
efdb0237
LP
656 /* Checks whether a specific controller is accessible,
657 * i.e. its hierarchy mounted. In the unified hierarchy all
658 * controllers are considered accessible, except for the named
659 * hierarchies */
b12afc8c 660
efdb0237
LP
661 if (!cg_controller_is_valid(controller))
662 return -EINVAL;
663
b4cccbc1
LP
664 r = cg_all_unified();
665 if (r < 0)
666 return r;
667 if (r > 0) {
efdb0237
LP
668 /* We don't support named hierarchies if we are using
669 * the unified hierarchy. */
670
671 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
672 return 0;
673
674 if (startswith(controller, "name="))
675 return -EOPNOTSUPP;
676
677 } else {
678 const char *cc, *dn;
679
680 dn = controller_to_dirname(controller);
681 cc = strjoina("/sys/fs/cgroup/", dn);
682
683 if (laccess(cc, F_OK) < 0)
684 return -errno;
685 }
37099707
LP
686
687 return 0;
688}
689
3474ae3c 690int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 691 int r;
dbd821ac 692
efdb0237 693 assert(controller);
3474ae3c 694 assert(fs);
70132bd0 695
efdb0237
LP
696 /* Check if the specified controller is actually accessible */
697 r = controller_is_accessible(controller);
37099707
LP
698 if (r < 0)
699 return r;
3474ae3c 700
efdb0237 701 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
702}
703
e27796a0 704static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
4ad49000
LP
705 assert(path);
706 assert(sb);
707 assert(ftwbuf);
e27796a0
LP
708
709 if (typeflag != FTW_DP)
710 return 0;
711
712 if (ftwbuf->level < 1)
713 return 0;
714
e155a0aa 715 (void) rmdir(path);
e27796a0
LP
716 return 0;
717}
718
8c6db833 719int cg_trim(const char *controller, const char *path, bool delete_root) {
7027ff61 720 _cleanup_free_ char *fs = NULL;
2977724b 721 int r = 0, q;
8c6db833 722
8c6db833
LP
723 assert(path);
724
e27796a0
LP
725 r = cg_get_path(controller, path, NULL, &fs);
726 if (r < 0)
8c6db833
LP
727 return r;
728
e27796a0 729 errno = 0;
e155a0aa
LP
730 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
731 if (errno == ENOENT)
732 r = 0;
b3267152 733 else if (errno > 0)
e155a0aa
LP
734 r = -errno;
735 else
736 r = -EIO;
737 }
e27796a0
LP
738
739 if (delete_root) {
4ad49000
LP
740 if (rmdir(fs) < 0 && errno != ENOENT)
741 return -errno;
e27796a0
LP
742 }
743
b4cccbc1
LP
744 q = cg_hybrid_unified();
745 if (q < 0)
746 return q;
747 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
748 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
749 if (q < 0)
750 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
751 }
752
e27796a0 753 return r;
8c6db833
LP
754}
755
65be7e06
ZJS
756/* Create a cgroup in the hierarchy of controller.
757 * Returns 0 if the group already existed, 1 on success, negative otherwise.
758 */
1434ae6f
LP
759int cg_create(const char *controller, const char *path) {
760 _cleanup_free_ char *fs = NULL;
761 int r;
762
763 r = cg_get_path_and_check(controller, path, NULL, &fs);
764 if (r < 0)
765 return r;
766
767 r = mkdir_parents(fs, 0755);
768 if (r < 0)
769 return r;
770
dae8b82e
ZJS
771 r = mkdir_errno_wrapper(fs, 0755);
772 if (r == -EEXIST)
773 return 0;
774 if (r < 0)
775 return r;
1434ae6f 776
b4cccbc1
LP
777 r = cg_hybrid_unified();
778 if (r < 0)
779 return r;
780
781 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
782 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
783 if (r < 0)
784 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
785 }
786
1434ae6f
LP
787 return 1;
788}
789
790int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
791 int r, q;
792
793 assert(pid >= 0);
794
795 r = cg_create(controller, path);
796 if (r < 0)
797 return r;
798
799 q = cg_attach(controller, path, pid);
800 if (q < 0)
801 return q;
802
803 /* This does not remove the cgroup on failure */
804 return r;
805}
806
8c6db833 807int cg_attach(const char *controller, const char *path, pid_t pid) {
574d5f2d
LP
808 _cleanup_free_ char *fs = NULL;
809 char c[DECIMAL_STR_MAX(pid_t) + 2];
8c6db833
LP
810 int r;
811
8c6db833
LP
812 assert(path);
813 assert(pid >= 0);
814
b043cd0b 815 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
3474ae3c 816 if (r < 0)
c6c18be3 817 return r;
8c6db833
LP
818
819 if (pid == 0)
df0ff127 820 pid = getpid_cached();
8c6db833 821
d054f0a4 822 xsprintf(c, PID_FMT "\n", pid);
8c6db833 823
604028de 824 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
2977724b
TH
825 if (r < 0)
826 return r;
827
b4cccbc1
LP
828 r = cg_hybrid_unified();
829 if (r < 0)
830 return r;
831
832 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
833 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
834 if (r < 0)
bd68e99b 835 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
2977724b
TH
836 }
837
838 return 0;
8c6db833
LP
839}
840
13b84ec7
LP
841int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
842 int r;
843
844 assert(controller);
845 assert(path);
846 assert(pid >= 0);
847
848 r = cg_attach(controller, path, pid);
849 if (r < 0) {
850 char prefix[strlen(path) + 1];
851
852 /* This didn't work? Then let's try all prefixes of
853 * the destination */
854
fecffe5d 855 PATH_FOREACH_PREFIX(prefix, path) {
e155a0aa
LP
856 int q;
857
858 q = cg_attach(controller, prefix, pid);
859 if (q >= 0)
860 return q;
13b84ec7
LP
861 }
862 }
863
e155a0aa 864 return r;
13b84ec7
LP
865}
866
62b9bb26 867int cg_set_access(
2d76d14e
LP
868 const char *controller,
869 const char *path,
2d76d14e
LP
870 uid_t uid,
871 gid_t gid) {
872
62b9bb26
LP
873 struct Attribute {
874 const char *name;
875 bool fatal;
876 };
877
4e1dfa45 878 /* cgroup v1, aka legacy/non-unified */
62b9bb26
LP
879 static const struct Attribute legacy_attributes[] = {
880 { "cgroup.procs", true },
881 { "tasks", false },
882 { "cgroup.clone_children", false },
883 {},
884 };
885
4e1dfa45 886 /* cgroup v2, aka unified */
62b9bb26
LP
887 static const struct Attribute unified_attributes[] = {
888 { "cgroup.procs", true },
889 { "cgroup.subtree_control", true },
890 { "cgroup.threads", false },
891 {},
892 };
893
894 static const struct Attribute* const attributes[] = {
895 [false] = legacy_attributes,
896 [true] = unified_attributes,
897 };
974efc46 898
40853aa5 899 _cleanup_free_ char *fs = NULL;
62b9bb26
LP
900 const struct Attribute *i;
901 int r, unified;
8c6db833 902
8c6db833
LP
903 assert(path);
904
62b9bb26 905 if (uid == UID_INVALID && gid == GID_INVALID)
8d53b453
LP
906 return 0;
907
62b9bb26
LP
908 unified = cg_unified_controller(controller);
909 if (unified < 0)
910 return unified;
8c6db833 911
62b9bb26
LP
912 /* Configure access to the cgroup itself */
913 r = cg_get_path(controller, path, NULL, &fs);
974efc46
LP
914 if (r < 0)
915 return r;
8c6db833 916
62b9bb26 917 r = chmod_and_chown(fs, 0755, uid, gid);
b4cccbc1
LP
918 if (r < 0)
919 return r;
40853aa5 920
62b9bb26
LP
921 /* Configure access to the cgroup's attributes */
922 for (i = attributes[unified]; i->name; i++) {
40853aa5 923 fs = mfree(fs);
40853aa5 924
62b9bb26 925 r = cg_get_path(controller, path, i->name, &fs);
40853aa5
LP
926 if (r < 0)
927 return r;
efdb0237 928
62b9bb26
LP
929 r = chmod_and_chown(fs, 0644, uid, gid);
930 if (r < 0) {
931 if (i->fatal)
932 return r;
5beac75e 933
62b9bb26
LP
934 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
935 }
936 }
937
938 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
939 r = cg_hybrid_unified();
2977724b 940 if (r < 0)
62b9bb26
LP
941 return r;
942 if (r > 0) {
943 /* Always propagate access mode from unified to legacy controller */
944 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
945 if (r < 0)
946 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
947 }
2977724b 948 }
974efc46 949
efdb0237 950 return 0;
8c6db833
LP
951}
952
4b58153d
LP
953int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
954 _cleanup_free_ char *fs = NULL;
955 int r;
956
957 assert(path);
958 assert(name);
959 assert(value || size <= 0);
960
961 r = cg_get_path(controller, path, NULL, &fs);
962 if (r < 0)
963 return r;
964
965 if (setxattr(fs, name, value, size, flags) < 0)
966 return -errno;
967
968 return 0;
969}
970
971int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
972 _cleanup_free_ char *fs = NULL;
973 ssize_t n;
974 int r;
975
976 assert(path);
977 assert(name);
978
979 r = cg_get_path(controller, path, NULL, &fs);
980 if (r < 0)
981 return r;
982
983 n = getxattr(fs, name, value, size);
984 if (n < 0)
985 return -errno;
986
987 return (int) n;
988}
989
7027ff61 990int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 991 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 992 const char *fs, *controller_str;
d2b39cb6 993 int unified, r;
efdb0237 994 size_t cs = 0;
8c6db833 995
8c6db833 996 assert(path);
c6c18be3 997 assert(pid >= 0);
8c6db833 998
5da38d07
TH
999 if (controller) {
1000 if (!cg_controller_is_valid(controller))
1001 return -EINVAL;
1002 } else
1003 controller = SYSTEMD_CGROUP_CONTROLLER;
1004
c22800e4 1005 unified = cg_unified_controller(controller);
b4cccbc1
LP
1006 if (unified < 0)
1007 return unified;
1008 if (unified == 0) {
b6629c4b
TH
1009 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1010 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1011 else
1012 controller_str = controller;
1013
1014 cs = strlen(controller_str);
1015 }
7027ff61 1016
b68fa010 1017 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
1018 r = fopen_unlocked(fs, "re", &f);
1019 if (r == -ENOENT)
1020 return -ESRCH;
1021 if (r < 0)
1022 return r;
35bbbf85 1023
d2b39cb6
LP
1024 for (;;) {
1025 _cleanup_free_ char *line = NULL;
efdb0237 1026 char *e, *p;
c6c18be3 1027
d2b39cb6
LP
1028 r = read_line(f, LONG_LINE_MAX, &line);
1029 if (r < 0)
1030 return r;
1031 if (r == 0)
1032 break;
c6c18be3 1033
efdb0237
LP
1034 if (unified) {
1035 e = startswith(line, "0:");
1036 if (!e)
1037 continue;
c6c18be3 1038
efdb0237
LP
1039 e = strchr(e, ':');
1040 if (!e)
1041 continue;
1042 } else {
1043 char *l;
1044 size_t k;
1045 const char *word, *state;
1046 bool found = false;
1047
1048 l = strchr(line, ':');
1049 if (!l)
1050 continue;
8af8afd6 1051
efdb0237
LP
1052 l++;
1053 e = strchr(l, ':');
1054 if (!e)
1055 continue;
8af8afd6 1056
efdb0237 1057 *e = 0;
00d4b1e6 1058 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 1059 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
1060 found = true;
1061 break;
1062 }
efdb0237
LP
1063 if (!found)
1064 continue;
8af8afd6
LP
1065 }
1066
8af8afd6 1067 p = strdup(e + 1);
7027ff61
LP
1068 if (!p)
1069 return -ENOMEM;
c6c18be3 1070
5e20b0a4
LP
1071 /* Truncate suffix indicating the process is a zombie */
1072 e = endswith(p, " (deleted)");
1073 if (e)
1074 *e = 0;
1075
c6c18be3 1076 *path = p;
7027ff61 1077 return 0;
c6c18be3
LP
1078 }
1079
1c80e425 1080 return -ENODATA;
8c6db833
LP
1081}
1082
1083int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 1084 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 1085 const char *sc;
415fc41c 1086 int r;
8c6db833 1087
8c6db833
LP
1088 assert(agent);
1089
c22800e4 1090 r = cg_unified_controller(controller);
b4cccbc1
LP
1091 if (r < 0)
1092 return r;
1093 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
1094 return -EOPNOTSUPP;
1095
7027ff61
LP
1096 r = cg_get_path(controller, NULL, "release_agent", &fs);
1097 if (r < 0)
c6c18be3 1098 return r;
8c6db833 1099
7027ff61
LP
1100 r = read_one_line_file(fs, &contents);
1101 if (r < 0)
1102 return r;
8c6db833
LP
1103
1104 sc = strstrip(contents);
e155a0aa 1105 if (isempty(sc)) {
604028de 1106 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 1107 if (r < 0)
7027ff61 1108 return r;
b8725df8 1109 } else if (!path_equal(sc, agent))
7027ff61 1110 return -EEXIST;
8c6db833 1111
0da16248 1112 fs = mfree(fs);
7027ff61
LP
1113 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1114 if (r < 0)
1115 return r;
8c6db833 1116
0da16248 1117 contents = mfree(contents);
7027ff61
LP
1118 r = read_one_line_file(fs, &contents);
1119 if (r < 0)
1120 return r;
8c6db833
LP
1121
1122 sc = strstrip(contents);
8c6db833 1123 if (streq(sc, "0")) {
604028de 1124 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
1125 if (r < 0)
1126 return r;
c6c18be3 1127
7027ff61
LP
1128 return 1;
1129 }
8c6db833 1130
7027ff61
LP
1131 if (!streq(sc, "1"))
1132 return -EIO;
8c6db833 1133
7027ff61 1134 return 0;
8c6db833
LP
1135}
1136
ad929bcc
KS
1137int cg_uninstall_release_agent(const char *controller) {
1138 _cleanup_free_ char *fs = NULL;
415fc41c 1139 int r;
efdb0237 1140
c22800e4 1141 r = cg_unified_controller(controller);
b4cccbc1
LP
1142 if (r < 0)
1143 return r;
1144 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 1145 return -EOPNOTSUPP;
ad929bcc 1146
ac9ef333
LP
1147 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1148 if (r < 0)
1149 return r;
1150
604028de 1151 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
1152 if (r < 0)
1153 return r;
1154
0da16248 1155 fs = mfree(fs);
ac9ef333 1156
ad929bcc
KS
1157 r = cg_get_path(controller, NULL, "release_agent", &fs);
1158 if (r < 0)
1159 return r;
1160
604028de 1161 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
1162 if (r < 0)
1163 return r;
1164
ac9ef333 1165 return 0;
ad929bcc
KS
1166}
1167
6f883237 1168int cg_is_empty(const char *controller, const char *path) {
7027ff61 1169 _cleanup_fclose_ FILE *f = NULL;
efdb0237 1170 pid_t pid;
7027ff61 1171 int r;
8c6db833 1172
8c6db833
LP
1173 assert(path);
1174
b043cd0b 1175 r = cg_enumerate_processes(controller, path, &f);
6f883237 1176 if (r == -ENOENT)
1bcf3fc6 1177 return true;
c3175a7f 1178 if (r < 0)
6f883237 1179 return r;
8c6db833 1180
6f883237 1181 r = cg_read_pid(f, &pid);
c6c18be3
LP
1182 if (r < 0)
1183 return r;
8c6db833 1184
6f883237 1185 return r == 0;
8c6db833
LP
1186}
1187
6f883237 1188int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 1189 int r;
8c6db833 1190
8c6db833
LP
1191 assert(path);
1192
6fd66507 1193 /* The root cgroup is always populated */
57ea45e1 1194 if (controller && empty_or_root(path))
efdb0237 1195 return false;
6fd66507 1196
c22800e4 1197 r = cg_unified_controller(controller);
b4cccbc1
LP
1198 if (r < 0)
1199 return r;
1200 if (r > 0) {
ab2c3861 1201 _cleanup_free_ char *t = NULL;
8c6db833 1202
efdb0237 1203 /* On the unified hierarchy we can check empty state
ab2c3861 1204 * via the "populated" attribute of "cgroup.events". */
8c6db833 1205
ab2c3861 1206 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
1207 if (r == -ENOENT)
1208 return true;
efdb0237
LP
1209 if (r < 0)
1210 return r;
1211
1212 return streq(t, "0");
1213 } else {
1214 _cleanup_closedir_ DIR *d = NULL;
1215 char *fn;
8c6db833 1216
efdb0237 1217 r = cg_is_empty(controller, path);
35d2e7ec 1218 if (r <= 0)
7027ff61 1219 return r;
35d2e7ec 1220
efdb0237
LP
1221 r = cg_enumerate_subgroups(controller, path, &d);
1222 if (r == -ENOENT)
1bcf3fc6 1223 return true;
efdb0237
LP
1224 if (r < 0)
1225 return r;
35d2e7ec 1226
efdb0237
LP
1227 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1228 _cleanup_free_ char *p = NULL;
1229
605405c6 1230 p = strjoin(path, "/", fn);
efdb0237
LP
1231 free(fn);
1232 if (!p)
1233 return -ENOMEM;
1234
1235 r = cg_is_empty_recursive(controller, p);
1236 if (r <= 0)
1237 return r;
1238 }
1239 if (r < 0)
1240 return r;
1241
1242 return true;
1243 }
35d2e7ec
LP
1244}
1245
1246int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 1247 char *t = NULL, *u = NULL;
efdb0237 1248 const char *e;
35d2e7ec
LP
1249
1250 assert(spec);
35d2e7ec
LP
1251
1252 if (*spec == '/') {
99be45a4 1253 if (!path_is_normalized(spec))
e884315e 1254 return -EINVAL;
35d2e7ec
LP
1255
1256 if (path) {
246aa6dd
LP
1257 t = strdup(spec);
1258 if (!t)
35d2e7ec
LP
1259 return -ENOMEM;
1260
858d36c1 1261 *path = path_simplify(t, false);
8c6db833
LP
1262 }
1263
35d2e7ec
LP
1264 if (controller)
1265 *controller = NULL;
1266
1267 return 0;
8c6db833
LP
1268 }
1269
246aa6dd
LP
1270 e = strchr(spec, ':');
1271 if (!e) {
185a0874 1272 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
1273 return -EINVAL;
1274
1275 if (controller) {
efdb0237 1276 t = strdup(spec);
246aa6dd 1277 if (!t)
35d2e7ec
LP
1278 return -ENOMEM;
1279
1280 *controller = t;
1281 }
1282
1283 if (path)
1284 *path = NULL;
1285
1286 return 0;
8c6db833
LP
1287 }
1288
efdb0237 1289 t = strndup(spec, e-spec);
e884315e
LP
1290 if (!t)
1291 return -ENOMEM;
185a0874 1292 if (!cg_controller_is_valid(t)) {
e884315e 1293 free(t);
35d2e7ec 1294 return -EINVAL;
246aa6dd
LP
1295 }
1296
efdb0237
LP
1297 if (isempty(e+1))
1298 u = NULL;
1299 else {
baa89da4
LP
1300 u = strdup(e+1);
1301 if (!u) {
1302 free(t);
1303 return -ENOMEM;
1304 }
35d2e7ec 1305
99be45a4 1306 if (!path_is_normalized(u) ||
baa89da4
LP
1307 !path_is_absolute(u)) {
1308 free(t);
1309 free(u);
1310 return -EINVAL;
1311 }
1312
858d36c1 1313 path_simplify(u, false);
baa89da4 1314 }
5954c074 1315
35d2e7ec
LP
1316 if (controller)
1317 *controller = t;
e884315e
LP
1318 else
1319 free(t);
35d2e7ec
LP
1320
1321 if (path)
1322 *path = u;
e884315e
LP
1323 else
1324 free(u);
35d2e7ec
LP
1325
1326 return 0;
8c6db833 1327}
c6c18be3 1328
7027ff61 1329int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
1330 _cleanup_free_ char *c = NULL, *p = NULL;
1331 char *t;
35d2e7ec
LP
1332 int r;
1333
1334 assert(path);
1335 assert(result);
1336
73e231ab 1337 /* First, check if it already is a filesystem path */
7027ff61 1338 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 1339
b69d29ce
LP
1340 t = strdup(path);
1341 if (!t)
35d2e7ec
LP
1342 return -ENOMEM;
1343
858d36c1 1344 *result = path_simplify(t, false);
35d2e7ec
LP
1345 return 0;
1346 }
1347
73e231ab 1348 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1349 r = cg_split_spec(path, &c, &p);
1350 if (r < 0)
35d2e7ec
LP
1351 return r;
1352
efdb0237 1353 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 1354}
1f73f0f1 1355
7027ff61 1356int cg_get_root_path(char **path) {
9444b1f2 1357 char *p, *e;
7027ff61
LP
1358 int r;
1359
1360 assert(path);
1361
9444b1f2 1362 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1363 if (r < 0)
1364 return r;
1365
efdb0237
LP
1366 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1367 if (!e)
1368 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1369 if (!e)
1370 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1371 if (e)
7027ff61
LP
1372 *e = 0;
1373
1f73f0f1
LP
1374 *path = p;
1375 return 0;
1376}
b59e2465 1377
751bc6ac
LP
1378int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1379 _cleanup_free_ char *rt = NULL;
1380 char *p;
ba1261bc
LP
1381 int r;
1382
e9174f29 1383 assert(cgroup);
751bc6ac 1384 assert(shifted);
e9174f29
LP
1385
1386 if (!root) {
1387 /* If the root was specified let's use that, otherwise
1388 * let's determine it from PID 1 */
1389
751bc6ac 1390 r = cg_get_root_path(&rt);
e9174f29
LP
1391 if (r < 0)
1392 return r;
1393
751bc6ac 1394 root = rt;
e9174f29 1395 }
ba1261bc 1396
751bc6ac 1397 p = path_startswith(cgroup, root);
efdb0237 1398 if (p && p > cgroup)
751bc6ac
LP
1399 *shifted = p - 1;
1400 else
1401 *shifted = cgroup;
1402
1403 return 0;
1404}
1405
1406int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1407 _cleanup_free_ char *raw = NULL;
1408 const char *c;
1409 int r;
1410
1411 assert(pid >= 0);
1412 assert(cgroup);
1413
1414 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1415 if (r < 0)
ba1261bc 1416 return r;
ba1261bc 1417
751bc6ac
LP
1418 r = cg_shift_path(raw, root, &c);
1419 if (r < 0)
1420 return r;
ba1261bc 1421
ae2a15bc
LP
1422 if (c == raw)
1423 *cgroup = TAKE_PTR(raw);
1424 else {
751bc6ac 1425 char *n;
ba1261bc 1426
751bc6ac
LP
1427 n = strdup(c);
1428 if (!n)
ba1261bc 1429 return -ENOMEM;
ba1261bc 1430
751bc6ac
LP
1431 *cgroup = n;
1432 }
ba1261bc
LP
1433
1434 return 0;
1435}
1436
9ed794a3 1437int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1438 char *c, *s;
1439 size_t n;
ef1673d1
MT
1440
1441 assert(cgroup);
6c03089c 1442 assert(unit);
ef1673d1 1443
8b0849e9
LP
1444 n = strcspn(cgroup, "/");
1445 if (n < 3)
1446 return -ENXIO;
1447
1448 c = strndupa(cgroup, n);
ae018d9b 1449 c = cg_unescape(c);
ef1673d1 1450
7410616c 1451 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1452 return -ENXIO;
ef1673d1 1453
d7bd3de0 1454 s = strdup(c);
6c03089c
LP
1455 if (!s)
1456 return -ENOMEM;
1457
1458 *unit = s;
ef1673d1
MT
1459 return 0;
1460}
1461
8b0849e9
LP
1462static bool valid_slice_name(const char *p, size_t n) {
1463
1464 if (!p)
1465 return false;
1466
fbd0b64f 1467 if (n < STRLEN("x.slice"))
8b0849e9
LP
1468 return false;
1469
1470 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1471 char buf[n+1], *c;
1472
1473 memcpy(buf, p, n);
1474 buf[n] = 0;
1475
1476 c = cg_unescape(buf);
1477
7410616c 1478 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1479 }
1480
1481 return false;
1482}
1483
9444b1f2 1484static const char *skip_slices(const char *p) {
8b0849e9
LP
1485 assert(p);
1486
9444b1f2
LP
1487 /* Skips over all slice assignments */
1488
1489 for (;;) {
1021b21b
LP
1490 size_t n;
1491
9444b1f2
LP
1492 p += strspn(p, "/");
1493
1494 n = strcspn(p, "/");
8b0849e9 1495 if (!valid_slice_name(p, n))
9444b1f2
LP
1496 return p;
1497
1498 p += n;
1499 }
1500}
1501
8b0849e9 1502int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1503 const char *e;
8b0849e9
LP
1504 char *unit;
1505 int r;
6c03089c
LP
1506
1507 assert(path);
8b0849e9 1508 assert(ret);
6c03089c 1509
9444b1f2 1510 e = skip_slices(path);
6c03089c 1511
8b0849e9
LP
1512 r = cg_path_decode_unit(e, &unit);
1513 if (r < 0)
1514 return r;
1515
1516 /* We skipped over the slices, don't accept any now */
1517 if (endswith(unit, ".slice")) {
1518 free(unit);
1519 return -ENXIO;
1520 }
1521
1522 *ret = unit;
1523 return 0;
6c03089c
LP
1524}
1525
1526int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1527 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1528 int r;
ba1261bc 1529
ef1673d1
MT
1530 assert(unit);
1531
7027ff61 1532 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1533 if (r < 0)
1534 return r;
1535
6c03089c
LP
1536 return cg_path_get_unit(cgroup, unit);
1537}
ef1673d1 1538
d4fffc4b
ZJS
1539/**
1540 * Skip session-*.scope, but require it to be there.
1541 */
9444b1f2
LP
1542static const char *skip_session(const char *p) {
1543 size_t n;
1544
8b0849e9
LP
1545 if (isempty(p))
1546 return NULL;
9444b1f2
LP
1547
1548 p += strspn(p, "/");
1549
1550 n = strcspn(p, "/");
fbd0b64f 1551 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1552 return NULL;
1553
8b0849e9
LP
1554 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1555 char buf[n - 8 - 6 + 1];
1556
1557 memcpy(buf, p + 8, n - 8 - 6);
1558 buf[n - 8 - 6] = 0;
d4fffc4b 1559
8b0849e9
LP
1560 /* Note that session scopes never need unescaping,
1561 * since they cannot conflict with the kernel's own
1562 * names, hence we don't need to call cg_unescape()
1563 * here. */
1564
1565 if (!session_id_valid(buf))
1566 return false;
1567
1568 p += n;
1569 p += strspn(p, "/");
1570 return p;
1571 }
1572
1573 return NULL;
d4fffc4b
ZJS
1574}
1575
1576/**
1577 * Skip user@*.service, but require it to be there.
1578 */
1579static const char *skip_user_manager(const char *p) {
1580 size_t n;
1581
8b0849e9
LP
1582 if (isempty(p))
1583 return NULL;
d4fffc4b
ZJS
1584
1585 p += strspn(p, "/");
1586
1587 n = strcspn(p, "/");
fbd0b64f 1588 if (n < STRLEN("user@x.service"))
6c03089c 1589 return NULL;
ef1673d1 1590
8b0849e9
LP
1591 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1592 char buf[n - 5 - 8 + 1];
9444b1f2 1593
8b0849e9
LP
1594 memcpy(buf, p + 5, n - 5 - 8);
1595 buf[n - 5 - 8] = 0;
1596
1597 /* Note that user manager services never need unescaping,
1598 * since they cannot conflict with the kernel's own
1599 * names, hence we don't need to call cg_unescape()
1600 * here. */
1601
1602 if (parse_uid(buf, NULL) < 0)
1603 return NULL;
1604
1605 p += n;
1606 p += strspn(p, "/");
1607
1608 return p;
1609 }
1610
1611 return NULL;
9444b1f2
LP
1612}
1613
329ac4bc 1614static const char *skip_user_prefix(const char *path) {
d4fffc4b 1615 const char *e, *t;
ef1673d1 1616
6c03089c 1617 assert(path);
ba1261bc 1618
9444b1f2
LP
1619 /* Skip slices, if there are any */
1620 e = skip_slices(path);
ba1261bc 1621
329ac4bc 1622 /* Skip the user manager, if it's in the path now... */
8b0849e9 1623 t = skip_user_manager(e);
329ac4bc
LP
1624 if (t)
1625 return t;
8b0849e9 1626
329ac4bc
LP
1627 /* Alternatively skip the user session if it is in the path... */
1628 return skip_session(e);
1629}
32081481 1630
329ac4bc
LP
1631int cg_path_get_user_unit(const char *path, char **ret) {
1632 const char *t;
6c03089c 1633
329ac4bc
LP
1634 assert(path);
1635 assert(ret);
8b0849e9 1636
329ac4bc
LP
1637 t = skip_user_prefix(path);
1638 if (!t)
8b0849e9 1639 return -ENXIO;
8b0849e9 1640
329ac4bc
LP
1641 /* And from here on it looks pretty much the same as for a
1642 * system unit, hence let's use the same parser from here
1643 * on. */
1644 return cg_path_get_unit(t, ret);
ef1673d1 1645}
ba1261bc 1646
ef1673d1 1647int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1648 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1649 int r;
1650
1651 assert(unit);
1652
7027ff61 1653 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1654 if (r < 0)
1655 return r;
1656
1657 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1658}
e884315e 1659
7027ff61 1660int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1661 _cleanup_free_ char *u = NULL;
1662 const char *sl;
89f7c846 1663 int r;
374ec6ab 1664
89f7c846
LP
1665 r = cg_path_get_unit(path, &u);
1666 if (r < 0)
1667 return r;
7027ff61 1668
efdb0237 1669 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1670 return readlink_malloc(sl, machine);
7027ff61
LP
1671}
1672
1673int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1674 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1675 int r;
1676
1677 assert(machine);
1678
1679 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1680 if (r < 0)
1681 return r;
1682
1683 return cg_path_get_machine_name(cgroup, machine);
1684}
1685
1686int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1687 _cleanup_free_ char *unit = NULL;
1688 char *start, *end;
1689 int r;
7027ff61
LP
1690
1691 assert(path);
7027ff61 1692
8b0849e9
LP
1693 r = cg_path_get_unit(path, &unit);
1694 if (r < 0)
1695 return r;
7027ff61 1696
8b0849e9
LP
1697 start = startswith(unit, "session-");
1698 if (!start)
cfeaa44a 1699 return -ENXIO;
8b0849e9
LP
1700 end = endswith(start, ".scope");
1701 if (!end)
cfeaa44a 1702 return -ENXIO;
8b0849e9
LP
1703
1704 *end = 0;
1705 if (!session_id_valid(start))
cfeaa44a 1706 return -ENXIO;
374ec6ab 1707
af08d2f9 1708 if (session) {
8b0849e9 1709 char *rr;
af08d2f9 1710
8b0849e9
LP
1711 rr = strdup(start);
1712 if (!rr)
af08d2f9
LP
1713 return -ENOMEM;
1714
8b0849e9 1715 *session = rr;
af08d2f9 1716 }
7027ff61 1717
7027ff61
LP
1718 return 0;
1719}
1720
1721int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1722 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1723 int r;
1724
7027ff61
LP
1725 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1726 if (r < 0)
1727 return r;
1728
1729 return cg_path_get_session(cgroup, session);
1730}
1731
ae018d9b 1732int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1733 _cleanup_free_ char *slice = NULL;
8b0849e9 1734 char *start, *end;
374ec6ab 1735 int r;
ae018d9b
LP
1736
1737 assert(path);
ae018d9b 1738
374ec6ab
LP
1739 r = cg_path_get_slice(path, &slice);
1740 if (r < 0)
1741 return r;
ae018d9b 1742
674eb685
LP
1743 start = startswith(slice, "user-");
1744 if (!start)
cfeaa44a 1745 return -ENXIO;
8b0849e9 1746 end = endswith(start, ".slice");
674eb685 1747 if (!end)
cfeaa44a 1748 return -ENXIO;
ae018d9b 1749
8b0849e9
LP
1750 *end = 0;
1751 if (parse_uid(start, uid) < 0)
cfeaa44a 1752 return -ENXIO;
674eb685 1753
674eb685 1754 return 0;
ae018d9b
LP
1755}
1756
1757int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1758 _cleanup_free_ char *cgroup = NULL;
1759 int r;
1760
ae018d9b
LP
1761 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1762 if (r < 0)
1763 return r;
1764
1765 return cg_path_get_owner_uid(cgroup, uid);
1766}
1767
1021b21b
LP
1768int cg_path_get_slice(const char *p, char **slice) {
1769 const char *e = NULL;
1021b21b
LP
1770
1771 assert(p);
1772 assert(slice);
1773
329ac4bc
LP
1774 /* Finds the right-most slice unit from the beginning, but
1775 * stops before we come to the first non-slice unit. */
1776
1021b21b
LP
1777 for (;;) {
1778 size_t n;
1779
1780 p += strspn(p, "/");
1781
1782 n = strcspn(p, "/");
8b0849e9 1783 if (!valid_slice_name(p, n)) {
1021b21b 1784
8b0849e9
LP
1785 if (!e) {
1786 char *s;
1021b21b 1787
e5d855d3 1788 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1789 if (!s)
1790 return -ENOMEM;
1021b21b 1791
8b0849e9
LP
1792 *slice = s;
1793 return 0;
1794 }
1795
1796 return cg_path_decode_unit(e, slice);
1021b21b
LP
1797 }
1798
1799 e = p;
1021b21b
LP
1800 p += n;
1801 }
1802}
1803
1804int cg_pid_get_slice(pid_t pid, char **slice) {
1805 _cleanup_free_ char *cgroup = NULL;
1806 int r;
1807
1808 assert(slice);
1809
1810 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1811 if (r < 0)
1812 return r;
1813
1814 return cg_path_get_slice(cgroup, slice);
1815}
1816
329ac4bc
LP
1817int cg_path_get_user_slice(const char *p, char **slice) {
1818 const char *t;
1819 assert(p);
1820 assert(slice);
1821
1822 t = skip_user_prefix(p);
1823 if (!t)
1824 return -ENXIO;
1825
1826 /* And now it looks pretty much the same as for a system
1827 * slice, so let's just use the same parser from here on. */
1828 return cg_path_get_slice(t, slice);
1829}
1830
1831int cg_pid_get_user_slice(pid_t pid, char **slice) {
1832 _cleanup_free_ char *cgroup = NULL;
1833 int r;
1834
1835 assert(slice);
1836
1837 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1838 if (r < 0)
1839 return r;
1840
1841 return cg_path_get_user_slice(cgroup, slice);
1842}
1843
ae018d9b
LP
1844char *cg_escape(const char *p) {
1845 bool need_prefix = false;
1846
1847 /* This implements very minimal escaping for names to be used
1848 * as file names in the cgroup tree: any name which might
1849 * conflict with a kernel name or is prefixed with '_' is
1850 * prefixed with a '_'. That way, when reading cgroup names it
1851 * is sufficient to remove a single prefixing underscore if
1852 * there is one. */
1853
1854 /* The return value of this function (unlike cg_unescape())
1855 * needs free()! */
1856
4c701096 1857 if (IN_SET(p[0], 0, '_', '.') ||
0cbd293e 1858 STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
efdb0237 1859 startswith(p, "cgroup."))
ae018d9b
LP
1860 need_prefix = true;
1861 else {
1862 const char *dot;
1863
1864 dot = strrchr(p, '.');
1865 if (dot) {
efdb0237
LP
1866 CGroupController c;
1867 size_t l = dot - p;
ae018d9b 1868
efdb0237
LP
1869 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1870 const char *n;
1871
1872 n = cgroup_controller_to_string(c);
ae018d9b 1873
efdb0237
LP
1874 if (l != strlen(n))
1875 continue;
ae018d9b 1876
efdb0237
LP
1877 if (memcmp(p, n, l) != 0)
1878 continue;
1879
1880 need_prefix = true;
1881 break;
ae018d9b
LP
1882 }
1883 }
1884 }
1885
1886 if (need_prefix)
1887 return strappend("_", p);
efdb0237
LP
1888
1889 return strdup(p);
ae018d9b
LP
1890}
1891
1892char *cg_unescape(const char *p) {
1893 assert(p);
1894
1895 /* The return value of this function (unlike cg_escape())
1896 * doesn't need free()! */
1897
1898 if (p[0] == '_')
1899 return (char*) p+1;
1900
1901 return (char*) p;
1902}
78edb35a
LP
1903
1904#define CONTROLLER_VALID \
4b549144 1905 DIGITS LETTERS \
78edb35a
LP
1906 "_"
1907
185a0874 1908bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1909 const char *t, *s;
1910
1911 if (!p)
1912 return false;
1913
b6629c4b
TH
1914 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1915 return true;
1916
185a0874
DJL
1917 s = startswith(p, "name=");
1918 if (s)
1919 p = s;
78edb35a 1920
4c701096 1921 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1922 return false;
1923
1924 for (t = p; *t; t++)
1925 if (!strchr(CONTROLLER_VALID, *t))
1926 return false;
1927
1928 if (t - p > FILENAME_MAX)
1929 return false;
1930
1931 return true;
1932}
a016b922
LP
1933
1934int cg_slice_to_path(const char *unit, char **ret) {
1935 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1936 const char *dash;
7410616c 1937 int r;
a016b922
LP
1938
1939 assert(unit);
1940 assert(ret);
1941
e5d855d3 1942 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1943 char *x;
1944
1945 x = strdup("");
1946 if (!x)
1947 return -ENOMEM;
1948 *ret = x;
1949 return 0;
1950 }
1951
7410616c 1952 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1953 return -EINVAL;
1954
1955 if (!endswith(unit, ".slice"))
1956 return -EINVAL;
1957
7410616c
LP
1958 r = unit_name_to_prefix(unit, &p);
1959 if (r < 0)
1960 return r;
a016b922
LP
1961
1962 dash = strchr(p, '-');
e66e5b61
LP
1963
1964 /* Don't allow initial dashes */
1965 if (dash == p)
1966 return -EINVAL;
1967
a016b922
LP
1968 while (dash) {
1969 _cleanup_free_ char *escaped = NULL;
1970 char n[dash - p + sizeof(".slice")];
1971
989290db 1972#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501
ZJS
1973 /* msan doesn't instrument stpncpy, so it thinks
1974 * n is later used unitialized:
1975 * https://github.com/google/sanitizers/issues/926
1976 */
1977 zero(n);
1978#endif
1979
e66e5b61 1980 /* Don't allow trailing or double dashes */
4c701096 1981 if (IN_SET(dash[1], 0, '-'))
c96cc582 1982 return -EINVAL;
a016b922 1983
c96cc582 1984 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1985 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1986 return -EINVAL;
1987
1988 escaped = cg_escape(n);
1989 if (!escaped)
1990 return -ENOMEM;
1991
1992 if (!strextend(&s, escaped, "/", NULL))
1993 return -ENOMEM;
1994
1995 dash = strchr(dash+1, '-');
1996 }
1997
1998 e = cg_escape(unit);
1999 if (!e)
2000 return -ENOMEM;
2001
2002 if (!strextend(&s, e, NULL))
2003 return -ENOMEM;
2004
ae2a15bc 2005 *ret = TAKE_PTR(s);
a016b922
LP
2006
2007 return 0;
2008}
4ad49000
LP
2009
2010int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2011 _cleanup_free_ char *p = NULL;
2012 int r;
2013
2014 r = cg_get_path(controller, path, attribute, &p);
2015 if (r < 0)
2016 return r;
2017
604028de 2018 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
2019}
2020
934277fe
LP
2021int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2022 _cleanup_free_ char *p = NULL;
2023 int r;
2024
2025 r = cg_get_path(controller, path, attribute, &p);
2026 if (r < 0)
2027 return r;
2028
2029 return read_one_line_file(p, ret);
2030}
2031
b734a4ff
LP
2032int cg_get_keyed_attribute(
2033 const char *controller,
2034 const char *path,
2035 const char *attribute,
2036 char **keys,
2037 char **ret_values) {
66ebf6c0 2038
b734a4ff 2039 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 2040 const char *p;
9177fa9f 2041 size_t n, i, n_done = 0;
b734a4ff
LP
2042 char **v;
2043 int r;
2044
4e1dfa45 2045 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
2046 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2047 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2048 *
2049 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
2050
2051 r = cg_get_path(controller, path, attribute, &filename);
2052 if (r < 0)
2053 return r;
2054
b734a4ff 2055 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
2056 if (r < 0)
2057 return r;
2058
b734a4ff
LP
2059 n = strv_length(keys);
2060 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2061 return 0;
66ebf6c0 2062
b734a4ff
LP
2063 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2064 v = newa0(char*, n);
66ebf6c0 2065
b734a4ff
LP
2066 for (p = contents; *p;) {
2067 const char *w = NULL;
b734a4ff 2068
9177fa9f
ZJS
2069 for (i = 0; i < n; i++)
2070 if (!v[i]) {
b734a4ff
LP
2071 w = first_word(p, keys[i]);
2072 if (w)
2073 break;
66ebf6c0 2074 }
66ebf6c0 2075
b734a4ff 2076 if (w) {
b734a4ff
LP
2077 size_t l;
2078
2079 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
2080 v[i] = strndup(w, l);
2081 if (!v[i]) {
b734a4ff
LP
2082 r = -ENOMEM;
2083 goto fail;
66ebf6c0 2084 }
b734a4ff 2085
b734a4ff 2086 n_done++;
b734a4ff
LP
2087 if (n_done >= n)
2088 goto done;
2089
2090 p = w + l;
9177fa9f 2091 } else
b734a4ff 2092 p += strcspn(p, NEWLINE);
b734a4ff
LP
2093
2094 p += strspn(p, NEWLINE);
66ebf6c0
TH
2095 }
2096
b734a4ff
LP
2097 r = -ENXIO;
2098
2099fail:
2100 for (i = 0; i < n; i++)
2101 free(v[i]);
2102
2103 return r;
2104
2105done:
2106 memcpy(ret_values, v, sizeof(char*) * n);
66ebf6c0 2107 return 0;
b734a4ff 2108
66ebf6c0
TH
2109}
2110
efdb0237
LP
2111int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2112 CGroupController c;
e353faa0 2113 CGroupMask done;
65be7e06 2114 bool created;
415fc41c 2115 int r;
4ad49000
LP
2116
2117 /* This one will create a cgroup in our private tree, but also
2118 * duplicate it in the trees specified in mask, and remove it
65be7e06
ZJS
2119 * in all others.
2120 *
2121 * Returns 0 if the group already existed in the systemd hierarchy,
2122 * 1 on success, negative otherwise.
2123 */
4ad49000
LP
2124
2125 /* First create the cgroup in our own hierarchy. */
2126 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2127 if (r < 0)
2128 return r;
490c5a37 2129 created = r;
4ad49000 2130
efdb0237 2131 /* If we are in the unified hierarchy, we are done now */
b4cccbc1
LP
2132 r = cg_all_unified();
2133 if (r < 0)
2134 return r;
2135 if (r > 0)
65be7e06 2136 return created;
efdb0237 2137
e353faa0
LP
2138 supported &= CGROUP_MASK_V1;
2139 mask = CGROUP_MASK_EXTEND_JOINED(mask);
2140 done = 0;
2141
efdb0237
LP
2142 /* Otherwise, do the same in the other hierarchies */
2143 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2144 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2145 const char *n;
2146
e353faa0 2147 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2148 continue;
2149
e353faa0
LP
2150 if (FLAGS_SET(done, bit))
2151 continue;
efdb0237 2152
e353faa0 2153 n = cgroup_controller_to_string(c);
f99850a0 2154 if (FLAGS_SET(mask, bit))
efdb0237 2155 (void) cg_create(n, path);
e353faa0 2156 else
efdb0237 2157 (void) cg_trim(n, path, true);
e353faa0
LP
2158
2159 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2160 }
2161
65be7e06 2162 return created;
4ad49000
LP
2163}
2164
efdb0237
LP
2165int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2166 CGroupController c;
e353faa0 2167 CGroupMask done;
415fc41c 2168 int r;
4ad49000
LP
2169
2170 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13b84ec7
LP
2171 if (r < 0)
2172 return r;
4ad49000 2173
b4cccbc1
LP
2174 r = cg_all_unified();
2175 if (r < 0)
2176 return r;
2177 if (r > 0)
efdb0237 2178 return 0;
7b3fd631 2179
e353faa0
LP
2180 supported &= CGROUP_MASK_V1;
2181 done = 0;
2182
efdb0237
LP
2183 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2184 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2185 const char *p = NULL;
7b3fd631 2186
e353faa0 2187 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2188 continue;
2189
e353faa0 2190 if (FLAGS_SET(done, bit))
efdb0237 2191 continue;
7b3fd631 2192
efdb0237
LP
2193 if (path_callback)
2194 p = path_callback(bit, userdata);
efdb0237
LP
2195 if (!p)
2196 p = path;
4ad49000 2197
efdb0237 2198 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
e353faa0 2199 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2200 }
2201
13b84ec7 2202 return 0;
4ad49000
LP
2203}
2204
efdb0237 2205int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
6c12b52e
LP
2206 Iterator i;
2207 void *pidp;
2208 int r = 0;
2209
2210 SET_FOREACH(pidp, pids, i) {
fea72cc0 2211 pid_t pid = PTR_TO_PID(pidp);
13b84ec7 2212 int q;
6c12b52e 2213
7b3fd631 2214 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
efdb0237 2215 if (q < 0 && r >= 0)
13b84ec7 2216 r = q;
6c12b52e
LP
2217 }
2218
2219 return r;
2220}
2221
efdb0237 2222int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
b3c5bad3 2223 CGroupController c;
e353faa0 2224 CGroupMask done;
b4cccbc1 2225 int r = 0, q;
4ad49000 2226
13b84ec7 2227 if (!path_equal(from, to)) {
1d98fef1 2228 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
13b84ec7
LP
2229 if (r < 0)
2230 return r;
2231 }
4ad49000 2232
b4cccbc1
LP
2233 q = cg_all_unified();
2234 if (q < 0)
2235 return q;
2236 if (q > 0)
efdb0237 2237 return r;
03b90d4b 2238
e353faa0
LP
2239 supported &= CGROUP_MASK_V1;
2240 done = 0;
2241
efdb0237
LP
2242 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2243 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2244 const char *p = NULL;
03b90d4b 2245
e353faa0 2246 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2247 continue;
2248
e353faa0 2249 if (FLAGS_SET(done, bit))
efdb0237 2250 continue;
03b90d4b 2251
efdb0237
LP
2252 if (to_callback)
2253 p = to_callback(bit, userdata);
efdb0237
LP
2254 if (!p)
2255 p = to;
2256
1d98fef1 2257 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
e353faa0 2258 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2259 }
2260
e353faa0 2261 return r;
4ad49000
LP
2262}
2263
efdb0237
LP
2264int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2265 CGroupController c;
e353faa0 2266 CGroupMask done;
b4cccbc1 2267 int r, q;
4ad49000
LP
2268
2269 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2270 if (r < 0)
2271 return r;
2272
b4cccbc1
LP
2273 q = cg_all_unified();
2274 if (q < 0)
2275 return q;
2276 if (q > 0)
efdb0237
LP
2277 return r;
2278
e353faa0
LP
2279 supported &= CGROUP_MASK_V1;
2280 done = 0;
2281
efdb0237
LP
2282 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2283 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2284
e353faa0 2285 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2286 continue;
2287
e353faa0 2288 if (FLAGS_SET(done, bit))
efdb0237 2289 continue;
4ad49000 2290
efdb0237 2291 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
e353faa0 2292 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2293 }
2294
e353faa0 2295 return r;
4ad49000
LP
2296}
2297
aae7e17f 2298int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
2299 _cleanup_free_ char *s = NULL;
2300 size_t n = 0, allocated = 0;
2301 bool space = false;
aae7e17f 2302 CGroupController c;
aae7e17f
FB
2303
2304 assert(ret);
2305
2306 if (mask == 0) {
2307 *ret = NULL;
2308 return 0;
2309 }
2310
2311 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
2312 const char *k;
2313 size_t l;
aae7e17f 2314
f99850a0 2315 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
2316 continue;
2317
ec635a2d
LP
2318 k = cgroup_controller_to_string(c);
2319 l = strlen(k);
2320
2321 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2322 return -ENOMEM;
2323
2324 if (space)
2325 s[n] = ' ';
2326 memcpy(s + n + space, k, l);
2327 n += space + l;
2328
2329 space = true;
aae7e17f
FB
2330 }
2331
ec635a2d 2332 assert(s);
aae7e17f 2333
ec635a2d 2334 s[n] = 0;
ae2a15bc 2335 *ret = TAKE_PTR(s);
ec635a2d 2336
aae7e17f
FB
2337 return 0;
2338}
2339
38a90d45
LP
2340int cg_mask_from_string(const char *value, CGroupMask *ret) {
2341 CGroupMask m = 0;
2342
2343 assert(ret);
aae7e17f
FB
2344 assert(value);
2345
2346 for (;;) {
2347 _cleanup_free_ char *n = NULL;
2348 CGroupController v;
2349 int r;
2350
2351 r = extract_first_word(&value, &n, NULL, 0);
2352 if (r < 0)
2353 return r;
2354 if (r == 0)
2355 break;
2356
2357 v = cgroup_controller_from_string(n);
2358 if (v < 0)
2359 continue;
2360
38a90d45 2361 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2362 }
38a90d45
LP
2363
2364 *ret = m;
aae7e17f
FB
2365 return 0;
2366}
2367
efdb0237 2368int cg_mask_supported(CGroupMask *ret) {
38a90d45 2369 CGroupMask mask;
415fc41c 2370 int r;
efdb0237 2371
67558d15
LP
2372 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2373 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2374 * pseudo-controllers. */
4ad49000 2375
b4cccbc1
LP
2376 r = cg_all_unified();
2377 if (r < 0)
2378 return r;
2379 if (r > 0) {
5f4c5fef 2380 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
2381
2382 /* In the unified hierarchy we can read the supported
2383 * and accessible controllers from a the top-level
2384 * cgroup attribute */
2385
5f4c5fef
LP
2386 r = cg_get_root_path(&root);
2387 if (r < 0)
2388 return r;
2389
2390 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2391 if (r < 0)
2392 return r;
2393
2394 r = read_one_line_file(path, &controllers);
efdb0237
LP
2395 if (r < 0)
2396 return r;
4ad49000 2397
aae7e17f
FB
2398 r = cg_mask_from_string(controllers, &mask);
2399 if (r < 0)
2400 return r;
efdb0237 2401
03afd780 2402 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
03a7b521 2403 * everything else off. */
03afd780 2404 mask &= CGROUP_MASK_V2;
efdb0237
LP
2405
2406 } else {
2407 CGroupController c;
2408
03afd780 2409 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 2410
38a90d45 2411 mask = 0;
efdb0237 2412 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2413 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2414 const char *n;
2415
03afd780
LP
2416 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2417 continue;
2418
efdb0237
LP
2419 n = cgroup_controller_to_string(c);
2420 if (controller_is_accessible(n) >= 0)
03afd780 2421 mask |= bit;
efdb0237 2422 }
4ad49000
LP
2423 }
2424
efdb0237
LP
2425 *ret = mask;
2426 return 0;
4ad49000 2427}
b12afc8c 2428
6925a0de
LP
2429int cg_kernel_controllers(Set **ret) {
2430 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 2431 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2432 int r;
2433
6925a0de 2434 assert(ret);
b12afc8c 2435
f09e86bc
LS
2436 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2437 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2438 * pseudo-controllers. */
e155a0aa 2439
6925a0de
LP
2440 controllers = set_new(&string_hash_ops);
2441 if (!controllers)
2442 return -ENOMEM;
2443
fdeea3f4
ZJS
2444 r = fopen_unlocked("/proc/cgroups", "re", &f);
2445 if (r == -ENOENT) {
2446 *ret = NULL;
2447 return 0;
b12afc8c 2448 }
fdeea3f4
ZJS
2449 if (r < 0)
2450 return r;
35bbbf85 2451
b12afc8c 2452 /* Ignore the header line */
2351e44d 2453 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
2454
2455 for (;;) {
2456 char *controller;
2457 int enabled = 0;
2458
2459 errno = 0;
2460 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2461
2462 if (feof(f))
2463 break;
2464
b3267152 2465 if (ferror(f) && errno > 0)
b12afc8c
LP
2466 return -errno;
2467
2468 return -EBADMSG;
2469 }
2470
2471 if (!enabled) {
2472 free(controller);
2473 continue;
2474 }
2475
efdb0237 2476 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
2477 free(controller);
2478 return -EBADMSG;
2479 }
2480
2481 r = set_consume(controllers, controller);
2482 if (r < 0)
2483 return r;
2484 }
2485
1cc6c93a 2486 *ret = TAKE_PTR(controllers);
6925a0de 2487
b12afc8c
LP
2488 return 0;
2489}
efdb0237 2490
5da38d07
TH
2491static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2492
4e1dfa45 2493/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
c22800e4
LP
2494 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2495 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2496 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
f08e9287 2497 *
c22800e4
LP
2498 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2499 * process management but disable the compat dual layout, we return %true on
2500 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
f08e9287
TH
2501 */
2502static thread_local bool unified_systemd_v232;
2503
1fcca10e 2504static int cg_unified_update(void) {
efdb0237 2505
efdb0237
LP
2506 struct statfs fs;
2507
2508 /* Checks if we support the unified hierarchy. Returns an
2509 * error when the cgroup hierarchies aren't mounted yet or we
2510 * have any other trouble determining if the unified hierarchy
2511 * is supported. */
2512
5da38d07
TH
2513 if (unified_cache >= CGROUP_UNIFIED_NONE)
2514 return 0;
efdb0237
LP
2515
2516 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2517 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2518
9aa21133
ZJS
2519 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2520 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2521 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2522 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2523 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2524 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2525 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2526 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2527 unified_systemd_v232 = false;
f08e9287 2528 } else {
2977724b 2529 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 2530 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
2531
2532 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2533 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2534 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2535 unified_systemd_v232 = true;
2536 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2537 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2538 unified_cache = CGROUP_UNIFIED_NONE;
2539 } else {
2540 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2541 (unsigned long long) fs.f_type);
5535d8f7 2542 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2543 }
2977724b 2544 }
baaa35ad
ZJS
2545 } else
2546 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2547 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2548 (unsigned long long)fs.f_type);
efdb0237 2549
5da38d07
TH
2550 return 0;
2551}
2552
c22800e4 2553int cg_unified_controller(const char *controller) {
b4cccbc1 2554 int r;
5da38d07 2555
1fcca10e 2556 r = cg_unified_update();
b4cccbc1
LP
2557 if (r < 0)
2558 return r;
5da38d07 2559
fc9ae717
LP
2560 if (unified_cache == CGROUP_UNIFIED_NONE)
2561 return false;
2562
2563 if (unified_cache >= CGROUP_UNIFIED_ALL)
2564 return true;
2565
2566 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2567}
2568
b4cccbc1 2569int cg_all_unified(void) {
4bb652ac
LP
2570 int r;
2571
2572 r = cg_unified_update();
2573 if (r < 0)
2574 return r;
2575
2576 return unified_cache >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2577}
2578
b4cccbc1
LP
2579int cg_hybrid_unified(void) {
2580 int r;
2977724b 2581
1fcca10e 2582 r = cg_unified_update();
b4cccbc1
LP
2583 if (r < 0)
2584 return r;
2977724b 2585
f08e9287 2586 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2977724b
TH
2587}
2588
415fc41c 2589int cg_unified_flush(void) {
5da38d07 2590 unified_cache = CGROUP_UNIFIED_UNKNOWN;
415fc41c 2591
1fcca10e 2592 return cg_unified_update();
efdb0237
LP
2593}
2594
27adcc97
LP
2595int cg_enable_everywhere(
2596 CGroupMask supported,
2597 CGroupMask mask,
2598 const char *p,
2599 CGroupMask *ret_result_mask) {
2600
77fa610b 2601 _cleanup_fclose_ FILE *f = NULL;
efdb0237
LP
2602 _cleanup_free_ char *fs = NULL;
2603 CGroupController c;
27adcc97 2604 CGroupMask ret = 0;
415fc41c 2605 int r;
efdb0237
LP
2606
2607 assert(p);
2608
27adcc97
LP
2609 if (supported == 0) {
2610 if (ret_result_mask)
2611 *ret_result_mask = 0;
efdb0237 2612 return 0;
27adcc97 2613 }
efdb0237 2614
b4cccbc1
LP
2615 r = cg_all_unified();
2616 if (r < 0)
2617 return r;
27adcc97
LP
2618 if (r == 0) {
2619 /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
2620 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
2621 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
2622 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
2623 * minimize surprises here and reduce triggers for re-realization by always saying we fully
2624 * succeeded.) */
2625 if (ret_result_mask)
2626 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
2627 * CGROUP_MASK_V2: The 'supported' mask
2628 * might contain pure-V1 or BPF
2629 * controllers, and we never want to
2630 * claim that we could enable those with
2631 * cgroup.subtree_control */
efdb0237 2632 return 0;
27adcc97 2633 }
efdb0237
LP
2634
2635 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2636 if (r < 0)
2637 return r;
2638
2639 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2640 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2641 const char *n;
2642
ab275f23
LP
2643 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
2644 continue;
2645
f99850a0 2646 if (!FLAGS_SET(supported, bit))
efdb0237
LP
2647 continue;
2648
2649 n = cgroup_controller_to_string(c);
2650 {
2651 char s[1 + strlen(n) + 1];
2652
f99850a0 2653 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
efdb0237
LP
2654 strcpy(s + 1, n);
2655
77fa610b
LP
2656 if (!f) {
2657 f = fopen(fs, "we");
54b5ba1d
LP
2658 if (!f)
2659 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
77fa610b
LP
2660 }
2661
604028de 2662 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
96aa6591 2663 if (r < 0) {
94f344fb
LP
2664 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
2665 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
96aa6591 2666 clearerr(f);
27adcc97
LP
2667
2668 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
2669 * happens for example when we attempt to turn off a controller up in the tree that is
2670 * used down in the tree. */
2671 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
2672 * only here, and not follow the same logic
2673 * for other errors such as EINVAL or
2674 * EOPNOTSUPP or anything else. That's
2675 * because EBUSY indicates that the
2676 * controllers is currently enabled and
2677 * cannot be disabled because something down
2678 * the hierarchy is still using it. Any other
2679 * error most likely means something like "I
2680 * never heard of this controller" or
2681 * similar. In the former case it's hence
2682 * safe to assume the controller is still on
2683 * after the failed operation, while in the
2684 * latter case it's safer to assume the
2685 * controller is unknown and hence certainly
2686 * not enabled. */
2687 ret |= bit;
2688 } else {
2689 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
2690 if (FLAGS_SET(mask, bit))
2691 ret |= bit;
96aa6591 2692 }
efdb0237
LP
2693 }
2694 }
2695
27adcc97
LP
2696 /* Let's return the precise set of controllers now enabled for the cgroup. */
2697 if (ret_result_mask)
2698 *ret_result_mask = ret;
2699
efdb0237
LP
2700 return 0;
2701}
2702
2703bool cg_is_unified_wanted(void) {
2704 static thread_local int wanted = -1;
415fc41c 2705 int r;
1d84ad94 2706 bool b;
77fab2a9 2707 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
5f086dc7 2708 _cleanup_free_ char *c = NULL;
efdb0237 2709
77fab2a9 2710 /* If we have a cached value, return that. */
efdb0237
LP
2711 if (wanted >= 0)
2712 return wanted;
2713
239a3d09
ZJS
2714 /* If the hierarchy is already mounted, then follow whatever
2715 * was chosen for it. */
2716 if (cg_unified_flush() >= 0)
b4cccbc1 2717 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
239a3d09 2718
5f086dc7
CD
2719 /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
2720 * respect that. */
1d84ad94 2721 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
5f086dc7
CD
2722 if (r > 0)
2723 return (wanted = b);
2724
2725 /* If we passed cgroup_no_v1=all with no other instructions, it seems
2726 * highly unlikely that we want to use hybrid or legacy hierarchy. */
2727 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
2728 if (r > 0 && streq_ptr(c, "all"))
2729 return (wanted = true);
efdb0237 2730
5f086dc7 2731 return (wanted = is_default);
efdb0237
LP
2732}
2733
2734bool cg_is_legacy_wanted(void) {
239a3d09
ZJS
2735 static thread_local int wanted = -1;
2736
2737 /* If we have a cached value, return that. */
2738 if (wanted >= 0)
2739 return wanted;
2740
4e1dfa45 2741 /* Check if we have cgroup v2 already mounted. */
1b59cf04
ZJS
2742 if (cg_unified_flush() >= 0 &&
2743 unified_cache == CGROUP_UNIFIED_ALL)
239a3d09 2744 return (wanted = false);
1b59cf04
ZJS
2745
2746 /* Otherwise, assume that at least partial legacy is wanted,
4e1dfa45 2747 * since cgroup v2 should already be mounted at this point. */
239a3d09 2748 return (wanted = true);
efdb0237
LP
2749}
2750
a4464b95 2751bool cg_is_hybrid_wanted(void) {
5da38d07 2752 static thread_local int wanted = -1;
415fc41c 2753 int r;
1d84ad94 2754 bool b;
c19739db
ZJS
2755 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2756 /* We default to true if the default is "hybrid", obviously,
2757 * but also when the default is "unified", because if we get
2758 * called, it means that unified hierarchy was not mounted. */
5da38d07 2759
77fab2a9 2760 /* If we have a cached value, return that. */
5da38d07
TH
2761 if (wanted >= 0)
2762 return wanted;
2763
239a3d09
ZJS
2764 /* If the hierarchy is already mounted, then follow whatever
2765 * was chosen for it. */
2766 if (cg_unified_flush() >= 0 &&
2767 unified_cache == CGROUP_UNIFIED_ALL)
2768 return (wanted = false);
2769
77fab2a9
ZJS
2770 /* Otherwise, let's see what the kernel command line has to say.
2771 * Since checking is expensive, cache a non-error result. */
1d84ad94 2772 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
5da38d07 2773
2dcb526d
ZJS
2774 /* The meaning of the kernel option is reversed wrt. to the return value
2775 * of this function, hence the negation. */
77fab2a9 2776 return (wanted = r > 0 ? !b : is_default);
5da38d07
TH
2777}
2778
13c31542
TH
2779int cg_weight_parse(const char *s, uint64_t *ret) {
2780 uint64_t u;
2781 int r;
2782
2783 if (isempty(s)) {
2784 *ret = CGROUP_WEIGHT_INVALID;
2785 return 0;
2786 }
2787
2788 r = safe_atou64(s, &u);
2789 if (r < 0)
2790 return r;
2791
2792 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2793 return -ERANGE;
2794
2795 *ret = u;
2796 return 0;
2797}
2798
9be57249
TH
2799const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2800 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2801 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2802 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2803 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2804};
2805
2806static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2807 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2808 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2809 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2810 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2811};
2812
2813DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2814
d53d9474
LP
2815int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2816 uint64_t u;
2817 int r;
2818
2819 if (isempty(s)) {
2820 *ret = CGROUP_CPU_SHARES_INVALID;
2821 return 0;
2822 }
2823
2824 r = safe_atou64(s, &u);
2825 if (r < 0)
2826 return r;
2827
2828 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2829 return -ERANGE;
2830
2831 *ret = u;
2832 return 0;
2833}
2834
2835int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2836 uint64_t u;
2837 int r;
2838
2839 if (isempty(s)) {
2840 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2841 return 0;
2842 }
2843
2844 r = safe_atou64(s, &u);
2845 if (r < 0)
2846 return r;
2847
2848 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2849 return -ERANGE;
2850
2851 *ret = u;
2852 return 0;
2853}
2854
f0bef277
EV
2855bool is_cgroup_fs(const struct statfs *s) {
2856 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2857 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2858}
2859
2860bool fd_is_cgroup_fs(int fd) {
2861 struct statfs s;
2862
2863 if (fstatfs(fd, &s) < 0)
2864 return -errno;
2865
2866 return is_cgroup_fs(&s);
2867}
2868
b82f71c7 2869static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2870 [CGROUP_CONTROLLER_CPU] = "cpu",
2871 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
13c31542 2872 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2873 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2874 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2875 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2876 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2877 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2878 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2879};
2880
2881DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2882
2883CGroupMask get_cpu_accounting_mask(void) {
2884 static CGroupMask needed_mask = (CGroupMask) -1;
2885
2886 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2887 * provided externally from the CPU controller, which means we don't
2888 * need to enable the CPU controller just to get metrics. This is good,
2889 * because enabling the CPU controller comes at a minor performance
2890 * hit, especially when it's propagated deep into large hierarchies.
2891 * There's also no separate CPU accounting controller available within
2892 * a unified hierarchy.
2893 *
2894 * This combination of factors results in the desired cgroup mask to
2895 * enable for CPU accounting varying as follows:
2896 *
2897 * ╔═════════════════════╤═════════════════════╗
2898 * ║ Linux ≥4.15 │ Linux <4.15 ║
2899 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2900 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2901 * ╟───────────────╫─────────────────────┼─────────────────────╢
2902 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2903 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2904 *
2905 * We check kernel version here instead of manually checking whether
2906 * cpu.stat is present for every cgroup, as that check in itself would
2907 * already be fairly expensive.
2908 *
2909 * Kernels where this patch has been backported will therefore have the
2910 * CPU controller enabled unnecessarily. This is more expensive than
2911 * necessary, but harmless. ☺️
2912 */
2913
2914 if (needed_mask == (CGroupMask) -1) {
2915 if (cg_all_unified()) {
2916 struct utsname u;
2917 assert_se(uname(&u) >= 0);
2918
2919 if (str_verscmp(u.release, "4.15") < 0)
2920 needed_mask = CGROUP_MASK_CPU;
2921 else
2922 needed_mask = 0;
2923 } else
2924 needed_mask = CGROUP_MASK_CPUACCT;
2925 }
2926
2927 return needed_mask;
2928}
2929
2930bool cpu_accounting_is_cheap(void) {
2931 return get_cpu_accounting_mask() == 0;
2932}