]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
cgroup: when discovering which controllers the kernel supports mask with what we...
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833 2
84ac7bea 3#include <dirent.h>
8c6db833 4#include <errno.h>
84ac7bea 5#include <ftw.h>
11c3a366 6#include <limits.h>
8c6db833 7#include <signal.h>
11c3a366 8#include <stddef.h>
35bbbf85 9#include <stdio_ext.h>
8c6db833 10#include <stdlib.h>
84ac7bea 11#include <string.h>
672c48cc 12#include <sys/stat.h>
11c3a366 13#include <sys/statfs.h>
672c48cc 14#include <sys/types.h>
4b58153d 15#include <sys/xattr.h>
84ac7bea 16#include <unistd.h>
8c6db833 17
b5efdb8a 18#include "alloc-util.h"
3ffd4af2 19#include "cgroup-util.h"
93cc7779 20#include "def.h"
a0956174 21#include "dirent-util.h"
84ac7bea 22#include "extract-word.h"
3ffd4af2 23#include "fd-util.h"
84ac7bea 24#include "fileio.h"
f97b34a6 25#include "format-util.h"
f4f15635 26#include "fs-util.h"
93cc7779 27#include "log.h"
84ac7bea
LP
28#include "login-util.h"
29#include "macro.h"
93cc7779 30#include "missing.h"
84ac7bea 31#include "mkdir.h"
6bedfcbb 32#include "parse-util.h"
9eb977db 33#include "path-util.h"
872a590e 34#include "proc-cmdline.h"
84ac7bea
LP
35#include "process-util.h"
36#include "set.h"
9444b1f2 37#include "special.h"
872a590e 38#include "stat-util.h"
d054f0a4 39#include "stdio-util.h"
8b43440b 40#include "string-table.h"
07630cea 41#include "string-util.h"
aae7e17f 42#include "strv.h"
84ac7bea 43#include "unit-name.h"
b1d4f8e1 44#include "user-util.h"
8c6db833 45
c6c18be3 46int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
7027ff61 47 _cleanup_free_ char *fs = NULL;
c6c18be3 48 FILE *f;
7027ff61 49 int r;
c6c18be3 50
c6c18be3
LP
51 assert(_f);
52
c3175a7f
LP
53 r = cg_get_path(controller, path, "cgroup.procs", &fs);
54 if (r < 0)
c6c18be3
LP
55 return r;
56
57 f = fopen(fs, "re");
c6c18be3
LP
58 if (!f)
59 return -errno;
60
61 *_f = f;
62 return 0;
63}
64
c6c18be3
LP
65int cg_read_pid(FILE *f, pid_t *_pid) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
70
7027ff61
LP
71 assert(f);
72 assert(_pid);
73
c6c18be3
LP
74 errno = 0;
75 if (fscanf(f, "%lu", &ul) != 1) {
76
77 if (feof(f))
78 return 0;
79
f5e5c28f 80 return errno > 0 ? -errno : -EIO;
c6c18be3
LP
81 }
82
83 if (ul <= 0)
84 return -EIO;
85
86 *_pid = (pid_t) ul;
87 return 1;
88}
89
8b238b13
LP
90int cg_read_event(
91 const char *controller,
92 const char *path,
93 const char *event,
94 char **val) {
95
ab2c3861
TH
96 _cleanup_free_ char *events = NULL, *content = NULL;
97 char *p, *line;
98 int r;
99
100 r = cg_get_path(controller, path, "cgroup.events", &events);
101 if (r < 0)
102 return r;
103
104 r = read_full_file(events, &content, NULL);
105 if (r < 0)
106 return r;
107
108 p = content;
109 while ((line = strsep(&p, "\n"))) {
110 char *key;
111
112 key = strsep(&line, " ");
113 if (!key || !line)
114 return -EINVAL;
115
116 if (strcmp(key, event))
117 continue;
118
119 *val = strdup(line);
120 return 0;
121 }
122
123 return -ENOENT;
124}
125
3228995c
CB
126bool cg_ns_supported(void) {
127 static thread_local int enabled = -1;
128
129 if (enabled >= 0)
130 return enabled;
131
0887fa71
LP
132 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
133 if (errno != ENOENT)
134 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
135 enabled = false;
136 } else
137 enabled = true;
3228995c
CB
138
139 return enabled;
140}
141
35d2e7ec 142int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 143 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
144 int r;
145 DIR *d;
146
35d2e7ec
LP
147 assert(_d);
148
149 /* This is not recursive! */
150
c3175a7f
LP
151 r = cg_get_path(controller, path, NULL, &fs);
152 if (r < 0)
35d2e7ec
LP
153 return r;
154
155 d = opendir(fs);
35d2e7ec
LP
156 if (!d)
157 return -errno;
158
159 *_d = d;
160 return 0;
161}
162
163int cg_read_subgroup(DIR *d, char **fn) {
164 struct dirent *de;
165
166 assert(d);
7027ff61 167 assert(fn);
35d2e7ec 168
f01327ad 169 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
170 char *b;
171
172 if (de->d_type != DT_DIR)
173 continue;
174
49bfc877 175 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
176 continue;
177
7027ff61
LP
178 b = strdup(de->d_name);
179 if (!b)
35d2e7ec
LP
180 return -ENOMEM;
181
182 *fn = b;
183 return 1;
184 }
185
35d2e7ec
LP
186 return 0;
187}
188
4ad49000 189int cg_rmdir(const char *controller, const char *path) {
7027ff61 190 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
191 int r;
192
ad293f5a
LP
193 r = cg_get_path(controller, path, NULL, &p);
194 if (r < 0)
35d2e7ec
LP
195 return r;
196
197 r = rmdir(p);
7027ff61
LP
198 if (r < 0 && errno != ENOENT)
199 return -errno;
35d2e7ec 200
b4cccbc1
LP
201 r = cg_hybrid_unified();
202 if (r < 0)
203 return r;
204 if (r == 0)
205 return 0;
206
207 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
208 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
209 if (r < 0)
210 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
211 }
212
7027ff61 213 return 0;
35d2e7ec
LP
214}
215
1d98fef1
LP
216int cg_kill(
217 const char *controller,
218 const char *path,
219 int sig,
220 CGroupFlags flags,
221 Set *s,
222 cg_kill_log_func_t log_kill,
223 void *userdata) {
224
7027ff61 225 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 226 bool done = false;
8c6db833 227 int r, ret = 0;
35d2e7ec 228 pid_t my_pid;
8c6db833 229
8c6db833
LP
230 assert(sig >= 0);
231
0d5b4810
LP
232 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
233 * SIGCONT on SIGKILL. */
234 if (IN_SET(sig, SIGCONT, SIGKILL))
235 flags &= ~CGROUP_SIGCONT;
236
8c6db833
LP
237 /* This goes through the tasks list and kills them all. This
238 * is repeated until no further processes are added to the
239 * tasks list, to properly handle forking processes */
240
7027ff61 241 if (!s) {
d5099efc 242 s = allocated_set = set_new(NULL);
7027ff61 243 if (!s)
ca949c9d 244 return -ENOMEM;
7027ff61 245 }
8c6db833 246
df0ff127 247 my_pid = getpid_cached();
8c6db833
LP
248
249 do {
7027ff61 250 _cleanup_fclose_ FILE *f = NULL;
0b172489 251 pid_t pid = 0;
8c6db833
LP
252 done = true;
253
7027ff61
LP
254 r = cg_enumerate_processes(controller, path, &f);
255 if (r < 0) {
4c633005 256 if (ret >= 0 && r != -ENOENT)
7027ff61 257 return r;
35d2e7ec 258
7027ff61 259 return ret;
35d2e7ec 260 }
c6c18be3
LP
261
262 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 263
1d98fef1 264 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 265 continue;
8c6db833 266
fea72cc0 267 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 268 continue;
8c6db833 269
1d98fef1
LP
270 if (log_kill)
271 log_kill(pid, sig, userdata);
272
8c6db833
LP
273 /* If we haven't killed this process yet, kill
274 * it */
4c633005
LP
275 if (kill(pid, sig) < 0) {
276 if (ret >= 0 && errno != ESRCH)
8c6db833 277 ret = -errno;
6e8314c4 278 } else {
1d98fef1 279 if (flags & CGROUP_SIGCONT)
e155a0aa 280 (void) kill(pid, SIGCONT);
430c18ed 281
6e8314c4
LP
282 if (ret == 0)
283 ret = 1;
430c18ed 284 }
8c6db833 285
8c6db833
LP
286 done = false;
287
fea72cc0 288 r = set_put(s, PID_TO_PTR(pid));
7027ff61 289 if (r < 0) {
35d2e7ec 290 if (ret >= 0)
7027ff61 291 return r;
35d2e7ec 292
7027ff61 293 return ret;
35d2e7ec
LP
294 }
295 }
296
297 if (r < 0) {
298 if (ret >= 0)
7027ff61 299 return r;
35d2e7ec 300
7027ff61 301 return ret;
8c6db833
LP
302 }
303
8c6db833
LP
304 /* To avoid racing against processes which fork
305 * quicker than we can kill them we repeat this until
306 * no new pids need to be killed. */
307
35d2e7ec 308 } while (!done);
8c6db833 309
35d2e7ec 310 return ret;
8c6db833
LP
311}
312
1d98fef1
LP
313int cg_kill_recursive(
314 const char *controller,
315 const char *path,
316 int sig,
317 CGroupFlags flags,
318 Set *s,
319 cg_kill_log_func_t log_kill,
320 void *userdata) {
321
7027ff61
LP
322 _cleanup_set_free_ Set *allocated_set = NULL;
323 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 324 int r, ret;
35d2e7ec 325 char *fn;
8c6db833
LP
326
327 assert(path);
8c6db833
LP
328 assert(sig >= 0);
329
7027ff61 330 if (!s) {
d5099efc 331 s = allocated_set = set_new(NULL);
7027ff61 332 if (!s)
ca949c9d 333 return -ENOMEM;
7027ff61 334 }
ca949c9d 335
1d98fef1 336 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 337
7027ff61
LP
338 r = cg_enumerate_subgroups(controller, path, &d);
339 if (r < 0) {
4c633005 340 if (ret >= 0 && r != -ENOENT)
7027ff61 341 return r;
8c6db833 342
7027ff61 343 return ret;
35d2e7ec 344 }
8c6db833 345
35d2e7ec 346 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 347 _cleanup_free_ char *p = NULL;
8c6db833 348
605405c6 349 p = strjoin(path, "/", fn);
35d2e7ec 350 free(fn);
7027ff61
LP
351 if (!p)
352 return -ENOMEM;
8c6db833 353
1d98fef1 354 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 355 if (r != 0 && ret >= 0)
35d2e7ec 356 ret = r;
8c6db833 357 }
7027ff61 358 if (ret >= 0 && r < 0)
35d2e7ec
LP
359 ret = r;
360
1d98fef1 361 if (flags & CGROUP_REMOVE) {
4ad49000 362 r = cg_rmdir(controller, path);
4c701096 363 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
364 return r;
365 }
ca949c9d 366
8c6db833
LP
367 return ret;
368}
369
1d98fef1
LP
370int cg_migrate(
371 const char *cfrom,
372 const char *pfrom,
373 const char *cto,
374 const char *pto,
375 CGroupFlags flags) {
376
35d2e7ec 377 bool done = false;
246aa6dd 378 _cleanup_set_free_ Set *s = NULL;
8c6db833
LP
379 int r, ret = 0;
380 pid_t my_pid;
381
246aa6dd
LP
382 assert(cfrom);
383 assert(pfrom);
384 assert(cto);
385 assert(pto);
8c6db833 386
d5099efc 387 s = set_new(NULL);
246aa6dd 388 if (!s)
35d2e7ec
LP
389 return -ENOMEM;
390
df0ff127 391 my_pid = getpid_cached();
8c6db833
LP
392
393 do {
7027ff61 394 _cleanup_fclose_ FILE *f = NULL;
0b172489 395 pid_t pid = 0;
8c6db833
LP
396 done = true;
397
b043cd0b 398 r = cg_enumerate_processes(cfrom, pfrom, &f);
246aa6dd 399 if (r < 0) {
4c633005 400 if (ret >= 0 && r != -ENOENT)
7027ff61 401 return r;
35d2e7ec 402
246aa6dd 403 return ret;
35d2e7ec 404 }
c6c18be3
LP
405
406 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 407
35d2e7ec
LP
408 /* This might do weird stuff if we aren't a
409 * single-threaded program. However, we
410 * luckily know we are not */
1d98fef1 411 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 412 continue;
8c6db833 413
fea72cc0 414 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
35d2e7ec
LP
415 continue;
416
9b84c7f9
LP
417 /* Ignore kernel threads. Since they can only
418 * exist in the root cgroup, we only check for
419 * them there. */
420 if (cfrom &&
57ea45e1 421 empty_or_root(pfrom) &&
9b84c7f9
LP
422 is_kernel_thread(pid) > 0)
423 continue;
424
246aa6dd
LP
425 r = cg_attach(cto, pto, pid);
426 if (r < 0) {
4c633005 427 if (ret >= 0 && r != -ESRCH)
35d2e7ec
LP
428 ret = r;
429 } else if (ret == 0)
430 ret = 1;
8c6db833 431
8c6db833 432 done = false;
35d2e7ec 433
fea72cc0 434 r = set_put(s, PID_TO_PTR(pid));
246aa6dd 435 if (r < 0) {
35d2e7ec 436 if (ret >= 0)
7027ff61 437 return r;
35d2e7ec 438
246aa6dd 439 return ret;
35d2e7ec
LP
440 }
441 }
442
443 if (r < 0) {
444 if (ret >= 0)
7027ff61 445 return r;
35d2e7ec 446
246aa6dd 447 return ret;
8c6db833 448 }
35d2e7ec 449 } while (!done);
8c6db833 450
35d2e7ec 451 return ret;
8c6db833
LP
452}
453
4ad49000
LP
454int cg_migrate_recursive(
455 const char *cfrom,
456 const char *pfrom,
457 const char *cto,
458 const char *pto,
1d98fef1 459 CGroupFlags flags) {
4ad49000 460
246aa6dd 461 _cleanup_closedir_ DIR *d = NULL;
7027ff61 462 int r, ret = 0;
35d2e7ec 463 char *fn;
8c6db833 464
246aa6dd
LP
465 assert(cfrom);
466 assert(pfrom);
467 assert(cto);
468 assert(pto);
8c6db833 469
1d98fef1 470 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
8c6db833 471
246aa6dd
LP
472 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
473 if (r < 0) {
4c633005 474 if (ret >= 0 && r != -ENOENT)
7027ff61
LP
475 return r;
476
246aa6dd 477 return ret;
35d2e7ec
LP
478 }
479
480 while ((r = cg_read_subgroup(d, &fn)) > 0) {
246aa6dd 481 _cleanup_free_ char *p = NULL;
8c6db833 482
605405c6 483 p = strjoin(pfrom, "/", fn);
35d2e7ec 484 free(fn);
e155a0aa
LP
485 if (!p)
486 return -ENOMEM;
8c6db833 487
1d98fef1 488 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
35d2e7ec
LP
489 if (r != 0 && ret >= 0)
490 ret = r;
8c6db833
LP
491 }
492
35d2e7ec
LP
493 if (r < 0 && ret >= 0)
494 ret = r;
495
1d98fef1 496 if (flags & CGROUP_REMOVE) {
4ad49000 497 r = cg_rmdir(cfrom, pfrom);
4c701096 498 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
246aa6dd
LP
499 return r;
500 }
8c6db833
LP
501
502 return ret;
503}
504
13b84ec7
LP
505int cg_migrate_recursive_fallback(
506 const char *cfrom,
507 const char *pfrom,
508 const char *cto,
509 const char *pto,
1d98fef1 510 CGroupFlags flags) {
13b84ec7
LP
511
512 int r;
513
514 assert(cfrom);
515 assert(pfrom);
516 assert(cto);
517 assert(pto);
518
1d98fef1 519 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
13b84ec7
LP
520 if (r < 0) {
521 char prefix[strlen(pto) + 1];
522
523 /* This didn't work? Then let's try all prefixes of the destination */
524
fecffe5d 525 PATH_FOREACH_PREFIX(prefix, pto) {
e155a0aa
LP
526 int q;
527
1d98fef1 528 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
e155a0aa
LP
529 if (q >= 0)
530 return q;
13b84ec7
LP
531 }
532 }
533
e155a0aa 534 return r;
13b84ec7
LP
535}
536
efdb0237
LP
537static const char *controller_to_dirname(const char *controller) {
538 const char *e;
3474ae3c 539
7027ff61
LP
540 assert(controller);
541
efdb0237
LP
542 /* Converts a controller name to the directory name below
543 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
544 * just cuts off the name= prefixed used for named
545 * hierarchies, if it is specified. */
546
2977724b 547 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 548 if (cg_hybrid_unified() > 0)
2977724b
TH
549 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
550 else
551 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
552 }
b6629c4b 553
efdb0237
LP
554 e = startswith(controller, "name=");
555 if (e)
556 return e;
557
558 return controller;
3474ae3c
LP
559}
560
569b19d8
LP
561static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
562 const char *dn;
018ef268 563 char *t = NULL;
3474ae3c 564
efdb0237 565 assert(fs);
569b19d8
LP
566 assert(controller);
567
568 dn = controller_to_dirname(controller);
efdb0237
LP
569
570 if (isempty(path) && isempty(suffix))
569b19d8 571 t = strappend("/sys/fs/cgroup/", dn);
efdb0237 572 else if (isempty(path))
605405c6 573 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
efdb0237 574 else if (isempty(suffix))
605405c6 575 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
efdb0237 576 else
605405c6 577 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
efdb0237
LP
578 if (!t)
579 return -ENOMEM;
3474ae3c 580
efdb0237
LP
581 *fs = t;
582 return 0;
583}
584
585static int join_path_unified(const char *path, const char *suffix, char **fs) {
586 char *t;
587
588 assert(fs);
589
590 if (isempty(path) && isempty(suffix))
591 t = strdup("/sys/fs/cgroup");
592 else if (isempty(path))
593 t = strappend("/sys/fs/cgroup/", suffix);
594 else if (isempty(suffix))
595 t = strappend("/sys/fs/cgroup/", path);
596 else
605405c6 597 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
3474ae3c
LP
598 if (!t)
599 return -ENOMEM;
600
efdb0237 601 *fs = t;
3474ae3c
LP
602 return 0;
603}
604
8c6db833 605int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 606 int r;
8c6db833 607
dbd821ac
LP
608 assert(fs);
609
efdb0237
LP
610 if (!controller) {
611 char *t;
612
569b19d8
LP
613 /* If no controller is specified, we return the path
614 * *below* the controllers, without any prefix. */
efdb0237
LP
615
616 if (!path && !suffix)
617 return -EINVAL;
618
989189ea 619 if (!suffix)
efdb0237 620 t = strdup(path);
989189ea 621 else if (!path)
efdb0237
LP
622 t = strdup(suffix);
623 else
605405c6 624 t = strjoin(path, "/", suffix);
efdb0237
LP
625 if (!t)
626 return -ENOMEM;
627
858d36c1 628 *fs = path_simplify(t, false);
efdb0237
LP
629 return 0;
630 }
631
632 if (!cg_controller_is_valid(controller))
78edb35a
LP
633 return -EINVAL;
634
b4cccbc1
LP
635 r = cg_all_unified();
636 if (r < 0)
637 return r;
638 if (r > 0)
efdb0237 639 r = join_path_unified(path, suffix, fs);
569b19d8
LP
640 else
641 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
642 if (r < 0)
643 return r;
7027ff61 644
858d36c1 645 path_simplify(*fs, false);
efdb0237 646 return 0;
3474ae3c 647}
dbd821ac 648
efdb0237 649static int controller_is_accessible(const char *controller) {
b4cccbc1 650 int r;
37099707 651
efdb0237 652 assert(controller);
37099707 653
efdb0237
LP
654 /* Checks whether a specific controller is accessible,
655 * i.e. its hierarchy mounted. In the unified hierarchy all
656 * controllers are considered accessible, except for the named
657 * hierarchies */
b12afc8c 658
efdb0237
LP
659 if (!cg_controller_is_valid(controller))
660 return -EINVAL;
661
b4cccbc1
LP
662 r = cg_all_unified();
663 if (r < 0)
664 return r;
665 if (r > 0) {
efdb0237
LP
666 /* We don't support named hierarchies if we are using
667 * the unified hierarchy. */
668
669 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
670 return 0;
671
672 if (startswith(controller, "name="))
673 return -EOPNOTSUPP;
674
675 } else {
676 const char *cc, *dn;
677
678 dn = controller_to_dirname(controller);
679 cc = strjoina("/sys/fs/cgroup/", dn);
680
681 if (laccess(cc, F_OK) < 0)
682 return -errno;
683 }
37099707
LP
684
685 return 0;
686}
687
3474ae3c 688int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 689 int r;
dbd821ac 690
efdb0237 691 assert(controller);
3474ae3c 692 assert(fs);
70132bd0 693
efdb0237
LP
694 /* Check if the specified controller is actually accessible */
695 r = controller_is_accessible(controller);
37099707
LP
696 if (r < 0)
697 return r;
3474ae3c 698
efdb0237 699 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
700}
701
e27796a0 702static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
4ad49000
LP
703 assert(path);
704 assert(sb);
705 assert(ftwbuf);
e27796a0
LP
706
707 if (typeflag != FTW_DP)
708 return 0;
709
710 if (ftwbuf->level < 1)
711 return 0;
712
e155a0aa 713 (void) rmdir(path);
e27796a0
LP
714 return 0;
715}
716
8c6db833 717int cg_trim(const char *controller, const char *path, bool delete_root) {
7027ff61 718 _cleanup_free_ char *fs = NULL;
2977724b 719 int r = 0, q;
8c6db833 720
8c6db833
LP
721 assert(path);
722
e27796a0
LP
723 r = cg_get_path(controller, path, NULL, &fs);
724 if (r < 0)
8c6db833
LP
725 return r;
726
e27796a0 727 errno = 0;
e155a0aa
LP
728 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
729 if (errno == ENOENT)
730 r = 0;
b3267152 731 else if (errno > 0)
e155a0aa
LP
732 r = -errno;
733 else
734 r = -EIO;
735 }
e27796a0
LP
736
737 if (delete_root) {
4ad49000
LP
738 if (rmdir(fs) < 0 && errno != ENOENT)
739 return -errno;
e27796a0
LP
740 }
741
b4cccbc1
LP
742 q = cg_hybrid_unified();
743 if (q < 0)
744 return q;
745 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
746 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
747 if (q < 0)
748 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
749 }
750
e27796a0 751 return r;
8c6db833
LP
752}
753
65be7e06
ZJS
754/* Create a cgroup in the hierarchy of controller.
755 * Returns 0 if the group already existed, 1 on success, negative otherwise.
756 */
1434ae6f
LP
757int cg_create(const char *controller, const char *path) {
758 _cleanup_free_ char *fs = NULL;
759 int r;
760
761 r = cg_get_path_and_check(controller, path, NULL, &fs);
762 if (r < 0)
763 return r;
764
765 r = mkdir_parents(fs, 0755);
766 if (r < 0)
767 return r;
768
dae8b82e
ZJS
769 r = mkdir_errno_wrapper(fs, 0755);
770 if (r == -EEXIST)
771 return 0;
772 if (r < 0)
773 return r;
1434ae6f 774
b4cccbc1
LP
775 r = cg_hybrid_unified();
776 if (r < 0)
777 return r;
778
779 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
780 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
781 if (r < 0)
782 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
783 }
784
1434ae6f
LP
785 return 1;
786}
787
788int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
789 int r, q;
790
791 assert(pid >= 0);
792
793 r = cg_create(controller, path);
794 if (r < 0)
795 return r;
796
797 q = cg_attach(controller, path, pid);
798 if (q < 0)
799 return q;
800
801 /* This does not remove the cgroup on failure */
802 return r;
803}
804
8c6db833 805int cg_attach(const char *controller, const char *path, pid_t pid) {
574d5f2d
LP
806 _cleanup_free_ char *fs = NULL;
807 char c[DECIMAL_STR_MAX(pid_t) + 2];
8c6db833
LP
808 int r;
809
8c6db833
LP
810 assert(path);
811 assert(pid >= 0);
812
b043cd0b 813 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
3474ae3c 814 if (r < 0)
c6c18be3 815 return r;
8c6db833
LP
816
817 if (pid == 0)
df0ff127 818 pid = getpid_cached();
8c6db833 819
d054f0a4 820 xsprintf(c, PID_FMT "\n", pid);
8c6db833 821
604028de 822 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
2977724b
TH
823 if (r < 0)
824 return r;
825
b4cccbc1
LP
826 r = cg_hybrid_unified();
827 if (r < 0)
828 return r;
829
830 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
831 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
832 if (r < 0)
bd68e99b 833 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
2977724b
TH
834 }
835
836 return 0;
8c6db833
LP
837}
838
13b84ec7
LP
839int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
840 int r;
841
842 assert(controller);
843 assert(path);
844 assert(pid >= 0);
845
846 r = cg_attach(controller, path, pid);
847 if (r < 0) {
848 char prefix[strlen(path) + 1];
849
850 /* This didn't work? Then let's try all prefixes of
851 * the destination */
852
fecffe5d 853 PATH_FOREACH_PREFIX(prefix, path) {
e155a0aa
LP
854 int q;
855
856 q = cg_attach(controller, prefix, pid);
857 if (q >= 0)
858 return q;
13b84ec7
LP
859 }
860 }
861
e155a0aa 862 return r;
13b84ec7
LP
863}
864
62b9bb26 865int cg_set_access(
2d76d14e
LP
866 const char *controller,
867 const char *path,
2d76d14e
LP
868 uid_t uid,
869 gid_t gid) {
870
62b9bb26
LP
871 struct Attribute {
872 const char *name;
873 bool fatal;
874 };
875
876 /* cgroupsv1, aka legacy/non-unified */
877 static const struct Attribute legacy_attributes[] = {
878 { "cgroup.procs", true },
879 { "tasks", false },
880 { "cgroup.clone_children", false },
881 {},
882 };
883
884 /* cgroupsv2, aka unified */
885 static const struct Attribute unified_attributes[] = {
886 { "cgroup.procs", true },
887 { "cgroup.subtree_control", true },
888 { "cgroup.threads", false },
889 {},
890 };
891
892 static const struct Attribute* const attributes[] = {
893 [false] = legacy_attributes,
894 [true] = unified_attributes,
895 };
974efc46 896
40853aa5 897 _cleanup_free_ char *fs = NULL;
62b9bb26
LP
898 const struct Attribute *i;
899 int r, unified;
8c6db833 900
8c6db833
LP
901 assert(path);
902
62b9bb26 903 if (uid == UID_INVALID && gid == GID_INVALID)
8d53b453
LP
904 return 0;
905
62b9bb26
LP
906 unified = cg_unified_controller(controller);
907 if (unified < 0)
908 return unified;
8c6db833 909
62b9bb26
LP
910 /* Configure access to the cgroup itself */
911 r = cg_get_path(controller, path, NULL, &fs);
974efc46
LP
912 if (r < 0)
913 return r;
8c6db833 914
62b9bb26 915 r = chmod_and_chown(fs, 0755, uid, gid);
b4cccbc1
LP
916 if (r < 0)
917 return r;
40853aa5 918
62b9bb26
LP
919 /* Configure access to the cgroup's attributes */
920 for (i = attributes[unified]; i->name; i++) {
40853aa5 921 fs = mfree(fs);
40853aa5 922
62b9bb26 923 r = cg_get_path(controller, path, i->name, &fs);
40853aa5
LP
924 if (r < 0)
925 return r;
efdb0237 926
62b9bb26
LP
927 r = chmod_and_chown(fs, 0644, uid, gid);
928 if (r < 0) {
929 if (i->fatal)
930 return r;
5beac75e 931
62b9bb26
LP
932 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
933 }
934 }
935
936 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
937 r = cg_hybrid_unified();
2977724b 938 if (r < 0)
62b9bb26
LP
939 return r;
940 if (r > 0) {
941 /* Always propagate access mode from unified to legacy controller */
942 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
943 if (r < 0)
944 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
945 }
2977724b 946 }
974efc46 947
efdb0237 948 return 0;
8c6db833
LP
949}
950
4b58153d
LP
951int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
952 _cleanup_free_ char *fs = NULL;
953 int r;
954
955 assert(path);
956 assert(name);
957 assert(value || size <= 0);
958
959 r = cg_get_path(controller, path, NULL, &fs);
960 if (r < 0)
961 return r;
962
963 if (setxattr(fs, name, value, size, flags) < 0)
964 return -errno;
965
966 return 0;
967}
968
969int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
970 _cleanup_free_ char *fs = NULL;
971 ssize_t n;
972 int r;
973
974 assert(path);
975 assert(name);
976
977 r = cg_get_path(controller, path, NULL, &fs);
978 if (r < 0)
979 return r;
980
981 n = getxattr(fs, name, value, size);
982 if (n < 0)
983 return -errno;
984
985 return (int) n;
986}
987
7027ff61 988int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 989 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 990 const char *fs, *controller_str;
d2b39cb6 991 int unified, r;
efdb0237 992 size_t cs = 0;
8c6db833 993
8c6db833 994 assert(path);
c6c18be3 995 assert(pid >= 0);
8c6db833 996
5da38d07
TH
997 if (controller) {
998 if (!cg_controller_is_valid(controller))
999 return -EINVAL;
1000 } else
1001 controller = SYSTEMD_CGROUP_CONTROLLER;
1002
c22800e4 1003 unified = cg_unified_controller(controller);
b4cccbc1
LP
1004 if (unified < 0)
1005 return unified;
1006 if (unified == 0) {
b6629c4b
TH
1007 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1008 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1009 else
1010 controller_str = controller;
1011
1012 cs = strlen(controller_str);
1013 }
7027ff61 1014
b68fa010 1015 fs = procfs_file_alloca(pid, "cgroup");
c6c18be3 1016 f = fopen(fs, "re");
4c633005
LP
1017 if (!f)
1018 return errno == ENOENT ? -ESRCH : -errno;
1019
35bbbf85
LP
1020 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1021
d2b39cb6
LP
1022 for (;;) {
1023 _cleanup_free_ char *line = NULL;
efdb0237 1024 char *e, *p;
c6c18be3 1025
d2b39cb6
LP
1026 r = read_line(f, LONG_LINE_MAX, &line);
1027 if (r < 0)
1028 return r;
1029 if (r == 0)
1030 break;
c6c18be3 1031
efdb0237
LP
1032 if (unified) {
1033 e = startswith(line, "0:");
1034 if (!e)
1035 continue;
c6c18be3 1036
efdb0237
LP
1037 e = strchr(e, ':');
1038 if (!e)
1039 continue;
1040 } else {
1041 char *l;
1042 size_t k;
1043 const char *word, *state;
1044 bool found = false;
1045
1046 l = strchr(line, ':');
1047 if (!l)
1048 continue;
8af8afd6 1049
efdb0237
LP
1050 l++;
1051 e = strchr(l, ':');
1052 if (!e)
1053 continue;
8af8afd6 1054
efdb0237 1055 *e = 0;
00d4b1e6 1056 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 1057 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
1058 found = true;
1059 break;
1060 }
efdb0237
LP
1061 if (!found)
1062 continue;
8af8afd6
LP
1063 }
1064
8af8afd6 1065 p = strdup(e + 1);
7027ff61
LP
1066 if (!p)
1067 return -ENOMEM;
c6c18be3 1068
5e20b0a4
LP
1069 /* Truncate suffix indicating the process is a zombie */
1070 e = endswith(p, " (deleted)");
1071 if (e)
1072 *e = 0;
1073
c6c18be3 1074 *path = p;
7027ff61 1075 return 0;
c6c18be3
LP
1076 }
1077
1c80e425 1078 return -ENODATA;
8c6db833
LP
1079}
1080
1081int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 1082 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 1083 const char *sc;
415fc41c 1084 int r;
8c6db833 1085
8c6db833
LP
1086 assert(agent);
1087
c22800e4 1088 r = cg_unified_controller(controller);
b4cccbc1
LP
1089 if (r < 0)
1090 return r;
1091 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
1092 return -EOPNOTSUPP;
1093
7027ff61
LP
1094 r = cg_get_path(controller, NULL, "release_agent", &fs);
1095 if (r < 0)
c6c18be3 1096 return r;
8c6db833 1097
7027ff61
LP
1098 r = read_one_line_file(fs, &contents);
1099 if (r < 0)
1100 return r;
8c6db833
LP
1101
1102 sc = strstrip(contents);
e155a0aa 1103 if (isempty(sc)) {
604028de 1104 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 1105 if (r < 0)
7027ff61 1106 return r;
b8725df8 1107 } else if (!path_equal(sc, agent))
7027ff61 1108 return -EEXIST;
8c6db833 1109
0da16248 1110 fs = mfree(fs);
7027ff61
LP
1111 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1112 if (r < 0)
1113 return r;
8c6db833 1114
0da16248 1115 contents = mfree(contents);
7027ff61
LP
1116 r = read_one_line_file(fs, &contents);
1117 if (r < 0)
1118 return r;
8c6db833
LP
1119
1120 sc = strstrip(contents);
8c6db833 1121 if (streq(sc, "0")) {
604028de 1122 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
1123 if (r < 0)
1124 return r;
c6c18be3 1125
7027ff61
LP
1126 return 1;
1127 }
8c6db833 1128
7027ff61
LP
1129 if (!streq(sc, "1"))
1130 return -EIO;
8c6db833 1131
7027ff61 1132 return 0;
8c6db833
LP
1133}
1134
ad929bcc
KS
1135int cg_uninstall_release_agent(const char *controller) {
1136 _cleanup_free_ char *fs = NULL;
415fc41c 1137 int r;
efdb0237 1138
c22800e4 1139 r = cg_unified_controller(controller);
b4cccbc1
LP
1140 if (r < 0)
1141 return r;
1142 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 1143 return -EOPNOTSUPP;
ad929bcc 1144
ac9ef333
LP
1145 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1146 if (r < 0)
1147 return r;
1148
604028de 1149 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
1150 if (r < 0)
1151 return r;
1152
0da16248 1153 fs = mfree(fs);
ac9ef333 1154
ad929bcc
KS
1155 r = cg_get_path(controller, NULL, "release_agent", &fs);
1156 if (r < 0)
1157 return r;
1158
604028de 1159 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
1160 if (r < 0)
1161 return r;
1162
ac9ef333 1163 return 0;
ad929bcc
KS
1164}
1165
6f883237 1166int cg_is_empty(const char *controller, const char *path) {
7027ff61 1167 _cleanup_fclose_ FILE *f = NULL;
efdb0237 1168 pid_t pid;
7027ff61 1169 int r;
8c6db833 1170
8c6db833
LP
1171 assert(path);
1172
b043cd0b 1173 r = cg_enumerate_processes(controller, path, &f);
6f883237 1174 if (r == -ENOENT)
1bcf3fc6 1175 return true;
c3175a7f 1176 if (r < 0)
6f883237 1177 return r;
8c6db833 1178
6f883237 1179 r = cg_read_pid(f, &pid);
c6c18be3
LP
1180 if (r < 0)
1181 return r;
8c6db833 1182
6f883237 1183 return r == 0;
8c6db833
LP
1184}
1185
6f883237 1186int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 1187 int r;
8c6db833 1188
8c6db833
LP
1189 assert(path);
1190
6fd66507 1191 /* The root cgroup is always populated */
57ea45e1 1192 if (controller && empty_or_root(path))
efdb0237 1193 return false;
6fd66507 1194
c22800e4 1195 r = cg_unified_controller(controller);
b4cccbc1
LP
1196 if (r < 0)
1197 return r;
1198 if (r > 0) {
ab2c3861 1199 _cleanup_free_ char *t = NULL;
8c6db833 1200
efdb0237 1201 /* On the unified hierarchy we can check empty state
ab2c3861 1202 * via the "populated" attribute of "cgroup.events". */
8c6db833 1203
ab2c3861 1204 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
1205 if (r == -ENOENT)
1206 return true;
efdb0237
LP
1207 if (r < 0)
1208 return r;
1209
1210 return streq(t, "0");
1211 } else {
1212 _cleanup_closedir_ DIR *d = NULL;
1213 char *fn;
8c6db833 1214
efdb0237 1215 r = cg_is_empty(controller, path);
35d2e7ec 1216 if (r <= 0)
7027ff61 1217 return r;
35d2e7ec 1218
efdb0237
LP
1219 r = cg_enumerate_subgroups(controller, path, &d);
1220 if (r == -ENOENT)
1bcf3fc6 1221 return true;
efdb0237
LP
1222 if (r < 0)
1223 return r;
35d2e7ec 1224
efdb0237
LP
1225 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1226 _cleanup_free_ char *p = NULL;
1227
605405c6 1228 p = strjoin(path, "/", fn);
efdb0237
LP
1229 free(fn);
1230 if (!p)
1231 return -ENOMEM;
1232
1233 r = cg_is_empty_recursive(controller, p);
1234 if (r <= 0)
1235 return r;
1236 }
1237 if (r < 0)
1238 return r;
1239
1240 return true;
1241 }
35d2e7ec
LP
1242}
1243
1244int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 1245 char *t = NULL, *u = NULL;
efdb0237 1246 const char *e;
35d2e7ec
LP
1247
1248 assert(spec);
35d2e7ec
LP
1249
1250 if (*spec == '/') {
99be45a4 1251 if (!path_is_normalized(spec))
e884315e 1252 return -EINVAL;
35d2e7ec
LP
1253
1254 if (path) {
246aa6dd
LP
1255 t = strdup(spec);
1256 if (!t)
35d2e7ec
LP
1257 return -ENOMEM;
1258
858d36c1 1259 *path = path_simplify(t, false);
8c6db833
LP
1260 }
1261
35d2e7ec
LP
1262 if (controller)
1263 *controller = NULL;
1264
1265 return 0;
8c6db833
LP
1266 }
1267
246aa6dd
LP
1268 e = strchr(spec, ':');
1269 if (!e) {
185a0874 1270 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
1271 return -EINVAL;
1272
1273 if (controller) {
efdb0237 1274 t = strdup(spec);
246aa6dd 1275 if (!t)
35d2e7ec
LP
1276 return -ENOMEM;
1277
1278 *controller = t;
1279 }
1280
1281 if (path)
1282 *path = NULL;
1283
1284 return 0;
8c6db833
LP
1285 }
1286
efdb0237 1287 t = strndup(spec, e-spec);
e884315e
LP
1288 if (!t)
1289 return -ENOMEM;
185a0874 1290 if (!cg_controller_is_valid(t)) {
e884315e 1291 free(t);
35d2e7ec 1292 return -EINVAL;
246aa6dd
LP
1293 }
1294
efdb0237
LP
1295 if (isempty(e+1))
1296 u = NULL;
1297 else {
baa89da4
LP
1298 u = strdup(e+1);
1299 if (!u) {
1300 free(t);
1301 return -ENOMEM;
1302 }
35d2e7ec 1303
99be45a4 1304 if (!path_is_normalized(u) ||
baa89da4
LP
1305 !path_is_absolute(u)) {
1306 free(t);
1307 free(u);
1308 return -EINVAL;
1309 }
1310
858d36c1 1311 path_simplify(u, false);
baa89da4 1312 }
5954c074 1313
35d2e7ec
LP
1314 if (controller)
1315 *controller = t;
e884315e
LP
1316 else
1317 free(t);
35d2e7ec
LP
1318
1319 if (path)
1320 *path = u;
e884315e
LP
1321 else
1322 free(u);
35d2e7ec
LP
1323
1324 return 0;
8c6db833 1325}
c6c18be3 1326
7027ff61 1327int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
1328 _cleanup_free_ char *c = NULL, *p = NULL;
1329 char *t;
35d2e7ec
LP
1330 int r;
1331
1332 assert(path);
1333 assert(result);
1334
73e231ab 1335 /* First, check if it already is a filesystem path */
7027ff61 1336 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 1337
b69d29ce
LP
1338 t = strdup(path);
1339 if (!t)
35d2e7ec
LP
1340 return -ENOMEM;
1341
858d36c1 1342 *result = path_simplify(t, false);
35d2e7ec
LP
1343 return 0;
1344 }
1345
73e231ab 1346 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1347 r = cg_split_spec(path, &c, &p);
1348 if (r < 0)
35d2e7ec
LP
1349 return r;
1350
efdb0237 1351 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 1352}
1f73f0f1 1353
7027ff61 1354int cg_get_root_path(char **path) {
9444b1f2 1355 char *p, *e;
7027ff61
LP
1356 int r;
1357
1358 assert(path);
1359
9444b1f2 1360 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1361 if (r < 0)
1362 return r;
1363
efdb0237
LP
1364 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1365 if (!e)
1366 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1367 if (!e)
1368 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1369 if (e)
7027ff61
LP
1370 *e = 0;
1371
1f73f0f1
LP
1372 *path = p;
1373 return 0;
1374}
b59e2465 1375
751bc6ac
LP
1376int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1377 _cleanup_free_ char *rt = NULL;
1378 char *p;
ba1261bc
LP
1379 int r;
1380
e9174f29 1381 assert(cgroup);
751bc6ac 1382 assert(shifted);
e9174f29
LP
1383
1384 if (!root) {
1385 /* If the root was specified let's use that, otherwise
1386 * let's determine it from PID 1 */
1387
751bc6ac 1388 r = cg_get_root_path(&rt);
e9174f29
LP
1389 if (r < 0)
1390 return r;
1391
751bc6ac 1392 root = rt;
e9174f29 1393 }
ba1261bc 1394
751bc6ac 1395 p = path_startswith(cgroup, root);
efdb0237 1396 if (p && p > cgroup)
751bc6ac
LP
1397 *shifted = p - 1;
1398 else
1399 *shifted = cgroup;
1400
1401 return 0;
1402}
1403
1404int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1405 _cleanup_free_ char *raw = NULL;
1406 const char *c;
1407 int r;
1408
1409 assert(pid >= 0);
1410 assert(cgroup);
1411
1412 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1413 if (r < 0)
ba1261bc 1414 return r;
ba1261bc 1415
751bc6ac
LP
1416 r = cg_shift_path(raw, root, &c);
1417 if (r < 0)
1418 return r;
ba1261bc 1419
ae2a15bc
LP
1420 if (c == raw)
1421 *cgroup = TAKE_PTR(raw);
1422 else {
751bc6ac 1423 char *n;
ba1261bc 1424
751bc6ac
LP
1425 n = strdup(c);
1426 if (!n)
ba1261bc 1427 return -ENOMEM;
ba1261bc 1428
751bc6ac
LP
1429 *cgroup = n;
1430 }
ba1261bc
LP
1431
1432 return 0;
1433}
1434
9ed794a3 1435int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1436 char *c, *s;
1437 size_t n;
ef1673d1
MT
1438
1439 assert(cgroup);
6c03089c 1440 assert(unit);
ef1673d1 1441
8b0849e9
LP
1442 n = strcspn(cgroup, "/");
1443 if (n < 3)
1444 return -ENXIO;
1445
1446 c = strndupa(cgroup, n);
ae018d9b 1447 c = cg_unescape(c);
ef1673d1 1448
7410616c 1449 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1450 return -ENXIO;
ef1673d1 1451
d7bd3de0 1452 s = strdup(c);
6c03089c
LP
1453 if (!s)
1454 return -ENOMEM;
1455
1456 *unit = s;
ef1673d1
MT
1457 return 0;
1458}
1459
8b0849e9
LP
1460static bool valid_slice_name(const char *p, size_t n) {
1461
1462 if (!p)
1463 return false;
1464
fbd0b64f 1465 if (n < STRLEN("x.slice"))
8b0849e9
LP
1466 return false;
1467
1468 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1469 char buf[n+1], *c;
1470
1471 memcpy(buf, p, n);
1472 buf[n] = 0;
1473
1474 c = cg_unescape(buf);
1475
7410616c 1476 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1477 }
1478
1479 return false;
1480}
1481
9444b1f2 1482static const char *skip_slices(const char *p) {
8b0849e9
LP
1483 assert(p);
1484
9444b1f2
LP
1485 /* Skips over all slice assignments */
1486
1487 for (;;) {
1021b21b
LP
1488 size_t n;
1489
9444b1f2
LP
1490 p += strspn(p, "/");
1491
1492 n = strcspn(p, "/");
8b0849e9 1493 if (!valid_slice_name(p, n))
9444b1f2
LP
1494 return p;
1495
1496 p += n;
1497 }
1498}
1499
8b0849e9 1500int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1501 const char *e;
8b0849e9
LP
1502 char *unit;
1503 int r;
6c03089c
LP
1504
1505 assert(path);
8b0849e9 1506 assert(ret);
6c03089c 1507
9444b1f2 1508 e = skip_slices(path);
6c03089c 1509
8b0849e9
LP
1510 r = cg_path_decode_unit(e, &unit);
1511 if (r < 0)
1512 return r;
1513
1514 /* We skipped over the slices, don't accept any now */
1515 if (endswith(unit, ".slice")) {
1516 free(unit);
1517 return -ENXIO;
1518 }
1519
1520 *ret = unit;
1521 return 0;
6c03089c
LP
1522}
1523
1524int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1525 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1526 int r;
ba1261bc 1527
ef1673d1
MT
1528 assert(unit);
1529
7027ff61 1530 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1531 if (r < 0)
1532 return r;
1533
6c03089c
LP
1534 return cg_path_get_unit(cgroup, unit);
1535}
ef1673d1 1536
d4fffc4b
ZJS
1537/**
1538 * Skip session-*.scope, but require it to be there.
1539 */
9444b1f2
LP
1540static const char *skip_session(const char *p) {
1541 size_t n;
1542
8b0849e9
LP
1543 if (isempty(p))
1544 return NULL;
9444b1f2
LP
1545
1546 p += strspn(p, "/");
1547
1548 n = strcspn(p, "/");
fbd0b64f 1549 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1550 return NULL;
1551
8b0849e9
LP
1552 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1553 char buf[n - 8 - 6 + 1];
1554
1555 memcpy(buf, p + 8, n - 8 - 6);
1556 buf[n - 8 - 6] = 0;
d4fffc4b 1557
8b0849e9
LP
1558 /* Note that session scopes never need unescaping,
1559 * since they cannot conflict with the kernel's own
1560 * names, hence we don't need to call cg_unescape()
1561 * here. */
1562
1563 if (!session_id_valid(buf))
1564 return false;
1565
1566 p += n;
1567 p += strspn(p, "/");
1568 return p;
1569 }
1570
1571 return NULL;
d4fffc4b
ZJS
1572}
1573
1574/**
1575 * Skip user@*.service, but require it to be there.
1576 */
1577static const char *skip_user_manager(const char *p) {
1578 size_t n;
1579
8b0849e9
LP
1580 if (isempty(p))
1581 return NULL;
d4fffc4b
ZJS
1582
1583 p += strspn(p, "/");
1584
1585 n = strcspn(p, "/");
fbd0b64f 1586 if (n < STRLEN("user@x.service"))
6c03089c 1587 return NULL;
ef1673d1 1588
8b0849e9
LP
1589 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1590 char buf[n - 5 - 8 + 1];
9444b1f2 1591
8b0849e9
LP
1592 memcpy(buf, p + 5, n - 5 - 8);
1593 buf[n - 5 - 8] = 0;
1594
1595 /* Note that user manager services never need unescaping,
1596 * since they cannot conflict with the kernel's own
1597 * names, hence we don't need to call cg_unescape()
1598 * here. */
1599
1600 if (parse_uid(buf, NULL) < 0)
1601 return NULL;
1602
1603 p += n;
1604 p += strspn(p, "/");
1605
1606 return p;
1607 }
1608
1609 return NULL;
9444b1f2
LP
1610}
1611
329ac4bc 1612static const char *skip_user_prefix(const char *path) {
d4fffc4b 1613 const char *e, *t;
ef1673d1 1614
6c03089c 1615 assert(path);
ba1261bc 1616
9444b1f2
LP
1617 /* Skip slices, if there are any */
1618 e = skip_slices(path);
ba1261bc 1619
329ac4bc 1620 /* Skip the user manager, if it's in the path now... */
8b0849e9 1621 t = skip_user_manager(e);
329ac4bc
LP
1622 if (t)
1623 return t;
8b0849e9 1624
329ac4bc
LP
1625 /* Alternatively skip the user session if it is in the path... */
1626 return skip_session(e);
1627}
32081481 1628
329ac4bc
LP
1629int cg_path_get_user_unit(const char *path, char **ret) {
1630 const char *t;
6c03089c 1631
329ac4bc
LP
1632 assert(path);
1633 assert(ret);
8b0849e9 1634
329ac4bc
LP
1635 t = skip_user_prefix(path);
1636 if (!t)
8b0849e9 1637 return -ENXIO;
8b0849e9 1638
329ac4bc
LP
1639 /* And from here on it looks pretty much the same as for a
1640 * system unit, hence let's use the same parser from here
1641 * on. */
1642 return cg_path_get_unit(t, ret);
ef1673d1 1643}
ba1261bc 1644
ef1673d1 1645int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1646 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1647 int r;
1648
1649 assert(unit);
1650
7027ff61 1651 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1652 if (r < 0)
1653 return r;
1654
1655 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1656}
e884315e 1657
7027ff61 1658int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1659 _cleanup_free_ char *u = NULL;
1660 const char *sl;
89f7c846 1661 int r;
374ec6ab 1662
89f7c846
LP
1663 r = cg_path_get_unit(path, &u);
1664 if (r < 0)
1665 return r;
7027ff61 1666
efdb0237 1667 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1668 return readlink_malloc(sl, machine);
7027ff61
LP
1669}
1670
1671int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1672 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1673 int r;
1674
1675 assert(machine);
1676
1677 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1678 if (r < 0)
1679 return r;
1680
1681 return cg_path_get_machine_name(cgroup, machine);
1682}
1683
1684int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1685 _cleanup_free_ char *unit = NULL;
1686 char *start, *end;
1687 int r;
7027ff61
LP
1688
1689 assert(path);
7027ff61 1690
8b0849e9
LP
1691 r = cg_path_get_unit(path, &unit);
1692 if (r < 0)
1693 return r;
7027ff61 1694
8b0849e9
LP
1695 start = startswith(unit, "session-");
1696 if (!start)
cfeaa44a 1697 return -ENXIO;
8b0849e9
LP
1698 end = endswith(start, ".scope");
1699 if (!end)
cfeaa44a 1700 return -ENXIO;
8b0849e9
LP
1701
1702 *end = 0;
1703 if (!session_id_valid(start))
cfeaa44a 1704 return -ENXIO;
374ec6ab 1705
af08d2f9 1706 if (session) {
8b0849e9 1707 char *rr;
af08d2f9 1708
8b0849e9
LP
1709 rr = strdup(start);
1710 if (!rr)
af08d2f9
LP
1711 return -ENOMEM;
1712
8b0849e9 1713 *session = rr;
af08d2f9 1714 }
7027ff61 1715
7027ff61
LP
1716 return 0;
1717}
1718
1719int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1720 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1721 int r;
1722
7027ff61
LP
1723 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1724 if (r < 0)
1725 return r;
1726
1727 return cg_path_get_session(cgroup, session);
1728}
1729
ae018d9b 1730int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1731 _cleanup_free_ char *slice = NULL;
8b0849e9 1732 char *start, *end;
374ec6ab 1733 int r;
ae018d9b
LP
1734
1735 assert(path);
ae018d9b 1736
374ec6ab
LP
1737 r = cg_path_get_slice(path, &slice);
1738 if (r < 0)
1739 return r;
ae018d9b 1740
674eb685
LP
1741 start = startswith(slice, "user-");
1742 if (!start)
cfeaa44a 1743 return -ENXIO;
8b0849e9 1744 end = endswith(start, ".slice");
674eb685 1745 if (!end)
cfeaa44a 1746 return -ENXIO;
ae018d9b 1747
8b0849e9
LP
1748 *end = 0;
1749 if (parse_uid(start, uid) < 0)
cfeaa44a 1750 return -ENXIO;
674eb685 1751
674eb685 1752 return 0;
ae018d9b
LP
1753}
1754
1755int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1756 _cleanup_free_ char *cgroup = NULL;
1757 int r;
1758
ae018d9b
LP
1759 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1760 if (r < 0)
1761 return r;
1762
1763 return cg_path_get_owner_uid(cgroup, uid);
1764}
1765
1021b21b
LP
1766int cg_path_get_slice(const char *p, char **slice) {
1767 const char *e = NULL;
1021b21b
LP
1768
1769 assert(p);
1770 assert(slice);
1771
329ac4bc
LP
1772 /* Finds the right-most slice unit from the beginning, but
1773 * stops before we come to the first non-slice unit. */
1774
1021b21b
LP
1775 for (;;) {
1776 size_t n;
1777
1778 p += strspn(p, "/");
1779
1780 n = strcspn(p, "/");
8b0849e9 1781 if (!valid_slice_name(p, n)) {
1021b21b 1782
8b0849e9
LP
1783 if (!e) {
1784 char *s;
1021b21b 1785
e5d855d3 1786 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1787 if (!s)
1788 return -ENOMEM;
1021b21b 1789
8b0849e9
LP
1790 *slice = s;
1791 return 0;
1792 }
1793
1794 return cg_path_decode_unit(e, slice);
1021b21b
LP
1795 }
1796
1797 e = p;
1021b21b
LP
1798 p += n;
1799 }
1800}
1801
1802int cg_pid_get_slice(pid_t pid, char **slice) {
1803 _cleanup_free_ char *cgroup = NULL;
1804 int r;
1805
1806 assert(slice);
1807
1808 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1809 if (r < 0)
1810 return r;
1811
1812 return cg_path_get_slice(cgroup, slice);
1813}
1814
329ac4bc
LP
1815int cg_path_get_user_slice(const char *p, char **slice) {
1816 const char *t;
1817 assert(p);
1818 assert(slice);
1819
1820 t = skip_user_prefix(p);
1821 if (!t)
1822 return -ENXIO;
1823
1824 /* And now it looks pretty much the same as for a system
1825 * slice, so let's just use the same parser from here on. */
1826 return cg_path_get_slice(t, slice);
1827}
1828
1829int cg_pid_get_user_slice(pid_t pid, char **slice) {
1830 _cleanup_free_ char *cgroup = NULL;
1831 int r;
1832
1833 assert(slice);
1834
1835 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1836 if (r < 0)
1837 return r;
1838
1839 return cg_path_get_user_slice(cgroup, slice);
1840}
1841
ae018d9b
LP
1842char *cg_escape(const char *p) {
1843 bool need_prefix = false;
1844
1845 /* This implements very minimal escaping for names to be used
1846 * as file names in the cgroup tree: any name which might
1847 * conflict with a kernel name or is prefixed with '_' is
1848 * prefixed with a '_'. That way, when reading cgroup names it
1849 * is sufficient to remove a single prefixing underscore if
1850 * there is one. */
1851
1852 /* The return value of this function (unlike cg_unescape())
1853 * needs free()! */
1854
4c701096 1855 if (IN_SET(p[0], 0, '_', '.') ||
a0ab5665
LP
1856 streq(p, "notify_on_release") ||
1857 streq(p, "release_agent") ||
efdb0237
LP
1858 streq(p, "tasks") ||
1859 startswith(p, "cgroup."))
ae018d9b
LP
1860 need_prefix = true;
1861 else {
1862 const char *dot;
1863
1864 dot = strrchr(p, '.');
1865 if (dot) {
efdb0237
LP
1866 CGroupController c;
1867 size_t l = dot - p;
ae018d9b 1868
efdb0237
LP
1869 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1870 const char *n;
1871
1872 n = cgroup_controller_to_string(c);
ae018d9b 1873
efdb0237
LP
1874 if (l != strlen(n))
1875 continue;
ae018d9b 1876
efdb0237
LP
1877 if (memcmp(p, n, l) != 0)
1878 continue;
1879
1880 need_prefix = true;
1881 break;
ae018d9b
LP
1882 }
1883 }
1884 }
1885
1886 if (need_prefix)
1887 return strappend("_", p);
efdb0237
LP
1888
1889 return strdup(p);
ae018d9b
LP
1890}
1891
1892char *cg_unescape(const char *p) {
1893 assert(p);
1894
1895 /* The return value of this function (unlike cg_escape())
1896 * doesn't need free()! */
1897
1898 if (p[0] == '_')
1899 return (char*) p+1;
1900
1901 return (char*) p;
1902}
78edb35a
LP
1903
1904#define CONTROLLER_VALID \
4b549144 1905 DIGITS LETTERS \
78edb35a
LP
1906 "_"
1907
185a0874 1908bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1909 const char *t, *s;
1910
1911 if (!p)
1912 return false;
1913
b6629c4b
TH
1914 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1915 return true;
1916
185a0874
DJL
1917 s = startswith(p, "name=");
1918 if (s)
1919 p = s;
78edb35a 1920
4c701096 1921 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1922 return false;
1923
1924 for (t = p; *t; t++)
1925 if (!strchr(CONTROLLER_VALID, *t))
1926 return false;
1927
1928 if (t - p > FILENAME_MAX)
1929 return false;
1930
1931 return true;
1932}
a016b922
LP
1933
1934int cg_slice_to_path(const char *unit, char **ret) {
1935 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1936 const char *dash;
7410616c 1937 int r;
a016b922
LP
1938
1939 assert(unit);
1940 assert(ret);
1941
e5d855d3 1942 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1943 char *x;
1944
1945 x = strdup("");
1946 if (!x)
1947 return -ENOMEM;
1948 *ret = x;
1949 return 0;
1950 }
1951
7410616c 1952 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1953 return -EINVAL;
1954
1955 if (!endswith(unit, ".slice"))
1956 return -EINVAL;
1957
7410616c
LP
1958 r = unit_name_to_prefix(unit, &p);
1959 if (r < 0)
1960 return r;
a016b922
LP
1961
1962 dash = strchr(p, '-');
e66e5b61
LP
1963
1964 /* Don't allow initial dashes */
1965 if (dash == p)
1966 return -EINVAL;
1967
a016b922
LP
1968 while (dash) {
1969 _cleanup_free_ char *escaped = NULL;
1970 char n[dash - p + sizeof(".slice")];
1971
989290db 1972#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501
ZJS
1973 /* msan doesn't instrument stpncpy, so it thinks
1974 * n is later used unitialized:
1975 * https://github.com/google/sanitizers/issues/926
1976 */
1977 zero(n);
1978#endif
1979
e66e5b61 1980 /* Don't allow trailing or double dashes */
4c701096 1981 if (IN_SET(dash[1], 0, '-'))
c96cc582 1982 return -EINVAL;
a016b922 1983
c96cc582 1984 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1985 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1986 return -EINVAL;
1987
1988 escaped = cg_escape(n);
1989 if (!escaped)
1990 return -ENOMEM;
1991
1992 if (!strextend(&s, escaped, "/", NULL))
1993 return -ENOMEM;
1994
1995 dash = strchr(dash+1, '-');
1996 }
1997
1998 e = cg_escape(unit);
1999 if (!e)
2000 return -ENOMEM;
2001
2002 if (!strextend(&s, e, NULL))
2003 return -ENOMEM;
2004
ae2a15bc 2005 *ret = TAKE_PTR(s);
a016b922
LP
2006
2007 return 0;
2008}
4ad49000
LP
2009
2010int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2011 _cleanup_free_ char *p = NULL;
2012 int r;
2013
2014 r = cg_get_path(controller, path, attribute, &p);
2015 if (r < 0)
2016 return r;
2017
604028de 2018 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
2019}
2020
934277fe
LP
2021int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2022 _cleanup_free_ char *p = NULL;
2023 int r;
2024
2025 r = cg_get_path(controller, path, attribute, &p);
2026 if (r < 0)
2027 return r;
2028
2029 return read_one_line_file(p, ret);
2030}
2031
b734a4ff
LP
2032int cg_get_keyed_attribute(
2033 const char *controller,
2034 const char *path,
2035 const char *attribute,
2036 char **keys,
2037 char **ret_values) {
66ebf6c0 2038
b734a4ff 2039 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 2040 const char *p;
9177fa9f 2041 size_t n, i, n_done = 0;
b734a4ff
LP
2042 char **v;
2043 int r;
2044
2045 /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
2046 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2047 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2048 *
2049 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
2050
2051 r = cg_get_path(controller, path, attribute, &filename);
2052 if (r < 0)
2053 return r;
2054
b734a4ff 2055 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
2056 if (r < 0)
2057 return r;
2058
b734a4ff
LP
2059 n = strv_length(keys);
2060 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2061 return 0;
66ebf6c0 2062
b734a4ff
LP
2063 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2064 v = newa0(char*, n);
66ebf6c0 2065
b734a4ff
LP
2066 for (p = contents; *p;) {
2067 const char *w = NULL;
b734a4ff 2068
9177fa9f
ZJS
2069 for (i = 0; i < n; i++)
2070 if (!v[i]) {
b734a4ff
LP
2071 w = first_word(p, keys[i]);
2072 if (w)
2073 break;
66ebf6c0 2074 }
66ebf6c0 2075
b734a4ff 2076 if (w) {
b734a4ff
LP
2077 size_t l;
2078
2079 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
2080 v[i] = strndup(w, l);
2081 if (!v[i]) {
b734a4ff
LP
2082 r = -ENOMEM;
2083 goto fail;
66ebf6c0 2084 }
b734a4ff 2085
b734a4ff 2086 n_done++;
b734a4ff
LP
2087 if (n_done >= n)
2088 goto done;
2089
2090 p = w + l;
9177fa9f 2091 } else
b734a4ff 2092 p += strcspn(p, NEWLINE);
b734a4ff
LP
2093
2094 p += strspn(p, NEWLINE);
66ebf6c0
TH
2095 }
2096
b734a4ff
LP
2097 r = -ENXIO;
2098
2099fail:
2100 for (i = 0; i < n; i++)
2101 free(v[i]);
2102
2103 return r;
2104
2105done:
2106 memcpy(ret_values, v, sizeof(char*) * n);
66ebf6c0 2107 return 0;
b734a4ff 2108
66ebf6c0
TH
2109}
2110
efdb0237
LP
2111int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2112 CGroupController c;
65be7e06 2113 bool created;
415fc41c 2114 int r;
4ad49000
LP
2115
2116 /* This one will create a cgroup in our private tree, but also
2117 * duplicate it in the trees specified in mask, and remove it
65be7e06
ZJS
2118 * in all others.
2119 *
2120 * Returns 0 if the group already existed in the systemd hierarchy,
2121 * 1 on success, negative otherwise.
2122 */
4ad49000
LP
2123
2124 /* First create the cgroup in our own hierarchy. */
2125 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2126 if (r < 0)
2127 return r;
490c5a37 2128 created = r;
4ad49000 2129
efdb0237 2130 /* If we are in the unified hierarchy, we are done now */
b4cccbc1
LP
2131 r = cg_all_unified();
2132 if (r < 0)
2133 return r;
2134 if (r > 0)
65be7e06 2135 return created;
efdb0237
LP
2136
2137 /* Otherwise, do the same in the other hierarchies */
2138 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2139 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2140 const char *n;
2141
ab275f23
LP
2142 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2143 continue;
2144
efdb0237
LP
2145 n = cgroup_controller_to_string(c);
2146
13b84ec7 2147 if (mask & bit)
efdb0237 2148 (void) cg_create(n, path);
13b84ec7 2149 else if (supported & bit)
efdb0237 2150 (void) cg_trim(n, path, true);
4ad49000
LP
2151 }
2152
65be7e06 2153 return created;
4ad49000
LP
2154}
2155
efdb0237
LP
2156int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2157 CGroupController c;
415fc41c 2158 int r;
4ad49000
LP
2159
2160 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13b84ec7
LP
2161 if (r < 0)
2162 return r;
4ad49000 2163
b4cccbc1
LP
2164 r = cg_all_unified();
2165 if (r < 0)
2166 return r;
2167 if (r > 0)
efdb0237 2168 return 0;
7b3fd631 2169
efdb0237
LP
2170 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2171 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2172 const char *p = NULL;
7b3fd631 2173
ab275f23
LP
2174 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2175 continue;
2176
efdb0237
LP
2177 if (!(supported & bit))
2178 continue;
7b3fd631 2179
efdb0237
LP
2180 if (path_callback)
2181 p = path_callback(bit, userdata);
7b3fd631 2182
efdb0237
LP
2183 if (!p)
2184 p = path;
4ad49000 2185
efdb0237 2186 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
4ad49000
LP
2187 }
2188
13b84ec7 2189 return 0;
4ad49000
LP
2190}
2191
efdb0237 2192int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
6c12b52e
LP
2193 Iterator i;
2194 void *pidp;
2195 int r = 0;
2196
2197 SET_FOREACH(pidp, pids, i) {
fea72cc0 2198 pid_t pid = PTR_TO_PID(pidp);
13b84ec7 2199 int q;
6c12b52e 2200
7b3fd631 2201 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
efdb0237 2202 if (q < 0 && r >= 0)
13b84ec7 2203 r = q;
6c12b52e
LP
2204 }
2205
2206 return r;
2207}
2208
efdb0237 2209int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
b3c5bad3 2210 CGroupController c;
b4cccbc1 2211 int r = 0, q;
4ad49000 2212
13b84ec7 2213 if (!path_equal(from, to)) {
1d98fef1 2214 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
13b84ec7
LP
2215 if (r < 0)
2216 return r;
2217 }
4ad49000 2218
b4cccbc1
LP
2219 q = cg_all_unified();
2220 if (q < 0)
2221 return q;
2222 if (q > 0)
efdb0237 2223 return r;
03b90d4b 2224
efdb0237
LP
2225 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2226 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2227 const char *p = NULL;
03b90d4b 2228
ab275f23
LP
2229 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2230 continue;
2231
efdb0237
LP
2232 if (!(supported & bit))
2233 continue;
03b90d4b 2234
efdb0237
LP
2235 if (to_callback)
2236 p = to_callback(bit, userdata);
4ad49000 2237
efdb0237
LP
2238 if (!p)
2239 p = to;
2240
1d98fef1 2241 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
4ad49000
LP
2242 }
2243
13b84ec7 2244 return 0;
4ad49000
LP
2245}
2246
efdb0237
LP
2247int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2248 CGroupController c;
b4cccbc1 2249 int r, q;
4ad49000
LP
2250
2251 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2252 if (r < 0)
2253 return r;
2254
b4cccbc1
LP
2255 q = cg_all_unified();
2256 if (q < 0)
2257 return q;
2258 if (q > 0)
efdb0237
LP
2259 return r;
2260
2261 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2262 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2263
ab275f23
LP
2264 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2265 continue;
2266
efdb0237
LP
2267 if (!(supported & bit))
2268 continue;
4ad49000 2269
efdb0237 2270 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
4ad49000
LP
2271 }
2272
13b84ec7 2273 return 0;
4ad49000
LP
2274}
2275
aae7e17f 2276int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
2277 _cleanup_free_ char *s = NULL;
2278 size_t n = 0, allocated = 0;
2279 bool space = false;
aae7e17f 2280 CGroupController c;
aae7e17f
FB
2281
2282 assert(ret);
2283
2284 if (mask == 0) {
2285 *ret = NULL;
2286 return 0;
2287 }
2288
2289 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
2290 const char *k;
2291 size_t l;
aae7e17f
FB
2292
2293 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2294 continue;
2295
ec635a2d
LP
2296 k = cgroup_controller_to_string(c);
2297 l = strlen(k);
2298
2299 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2300 return -ENOMEM;
2301
2302 if (space)
2303 s[n] = ' ';
2304 memcpy(s + n + space, k, l);
2305 n += space + l;
2306
2307 space = true;
aae7e17f
FB
2308 }
2309
ec635a2d 2310 assert(s);
aae7e17f 2311
ec635a2d 2312 s[n] = 0;
ae2a15bc 2313 *ret = TAKE_PTR(s);
ec635a2d 2314
aae7e17f
FB
2315 return 0;
2316}
2317
38a90d45
LP
2318int cg_mask_from_string(const char *value, CGroupMask *ret) {
2319 CGroupMask m = 0;
2320
2321 assert(ret);
aae7e17f
FB
2322 assert(value);
2323
2324 for (;;) {
2325 _cleanup_free_ char *n = NULL;
2326 CGroupController v;
2327 int r;
2328
2329 r = extract_first_word(&value, &n, NULL, 0);
2330 if (r < 0)
2331 return r;
2332 if (r == 0)
2333 break;
2334
2335 v = cgroup_controller_from_string(n);
2336 if (v < 0)
2337 continue;
2338
38a90d45 2339 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2340 }
38a90d45
LP
2341
2342 *ret = m;
aae7e17f
FB
2343 return 0;
2344}
2345
efdb0237 2346int cg_mask_supported(CGroupMask *ret) {
38a90d45 2347 CGroupMask mask;
415fc41c 2348 int r;
efdb0237
LP
2349
2350 /* Determines the mask of supported cgroup controllers. Only
2351 * includes controllers we can make sense of and that are
2352 * actually accessible. */
4ad49000 2353
b4cccbc1
LP
2354 r = cg_all_unified();
2355 if (r < 0)
2356 return r;
2357 if (r > 0) {
5f4c5fef 2358 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
2359
2360 /* In the unified hierarchy we can read the supported
2361 * and accessible controllers from a the top-level
2362 * cgroup attribute */
2363
5f4c5fef
LP
2364 r = cg_get_root_path(&root);
2365 if (r < 0)
2366 return r;
2367
2368 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2369 if (r < 0)
2370 return r;
2371
2372 r = read_one_line_file(path, &controllers);
efdb0237
LP
2373 if (r < 0)
2374 return r;
4ad49000 2375
aae7e17f
FB
2376 r = cg_mask_from_string(controllers, &mask);
2377 if (r < 0)
2378 return r;
efdb0237 2379
03afd780 2380 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
03a7b521 2381 * everything else off. */
03afd780 2382 mask &= CGROUP_MASK_V2;
efdb0237
LP
2383
2384 } else {
2385 CGroupController c;
2386
03afd780 2387 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 2388
38a90d45 2389 mask = 0;
efdb0237 2390 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2391 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2392 const char *n;
2393
03afd780
LP
2394 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2395 continue;
2396
efdb0237
LP
2397 n = cgroup_controller_to_string(c);
2398 if (controller_is_accessible(n) >= 0)
03afd780 2399 mask |= bit;
efdb0237 2400 }
4ad49000
LP
2401 }
2402
efdb0237
LP
2403 *ret = mask;
2404 return 0;
4ad49000 2405}
b12afc8c 2406
6925a0de
LP
2407int cg_kernel_controllers(Set **ret) {
2408 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 2409 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2410 int r;
2411
6925a0de 2412 assert(ret);
b12afc8c 2413
f09e86bc
LS
2414 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2415 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2416 * pseudo-controllers. */
e155a0aa 2417
6925a0de
LP
2418 controllers = set_new(&string_hash_ops);
2419 if (!controllers)
2420 return -ENOMEM;
2421
b12afc8c
LP
2422 f = fopen("/proc/cgroups", "re");
2423 if (!f) {
6925a0de
LP
2424 if (errno == ENOENT) {
2425 *ret = NULL;
b12afc8c 2426 return 0;
6925a0de
LP
2427 }
2428
b12afc8c
LP
2429 return -errno;
2430 }
2431
35bbbf85
LP
2432 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2433
b12afc8c 2434 /* Ignore the header line */
2351e44d 2435 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
2436
2437 for (;;) {
2438 char *controller;
2439 int enabled = 0;
2440
2441 errno = 0;
2442 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2443
2444 if (feof(f))
2445 break;
2446
b3267152 2447 if (ferror(f) && errno > 0)
b12afc8c
LP
2448 return -errno;
2449
2450 return -EBADMSG;
2451 }
2452
2453 if (!enabled) {
2454 free(controller);
2455 continue;
2456 }
2457
efdb0237 2458 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
2459 free(controller);
2460 return -EBADMSG;
2461 }
2462
2463 r = set_consume(controllers, controller);
2464 if (r < 0)
2465 return r;
2466 }
2467
1cc6c93a 2468 *ret = TAKE_PTR(controllers);
6925a0de 2469
b12afc8c
LP
2470 return 0;
2471}
efdb0237 2472
5da38d07
TH
2473static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2474
c22800e4
LP
2475/* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
2476 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2477 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2478 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
f08e9287 2479 *
c22800e4
LP
2480 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2481 * process management but disable the compat dual layout, we return %true on
2482 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
f08e9287
TH
2483 */
2484static thread_local bool unified_systemd_v232;
2485
1fcca10e 2486static int cg_unified_update(void) {
efdb0237 2487
efdb0237
LP
2488 struct statfs fs;
2489
2490 /* Checks if we support the unified hierarchy. Returns an
2491 * error when the cgroup hierarchies aren't mounted yet or we
2492 * have any other trouble determining if the unified hierarchy
2493 * is supported. */
2494
5da38d07
TH
2495 if (unified_cache >= CGROUP_UNIFIED_NONE)
2496 return 0;
efdb0237
LP
2497
2498 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2499 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2500
9aa21133
ZJS
2501 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2502 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2503 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2504 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2505 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2506 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2507 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2508 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2509 unified_systemd_v232 = false;
f08e9287 2510 } else {
2977724b 2511 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 2512 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
2513
2514 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2515 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2516 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2517 unified_systemd_v232 = true;
2518 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2519 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2520 unified_cache = CGROUP_UNIFIED_NONE;
2521 } else {
2522 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2523 (unsigned long long) fs.f_type);
5535d8f7 2524 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2525 }
2977724b 2526 }
651d47d1
ZJS
2527 } else {
2528 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2529 (unsigned long long) fs.f_type);
8b3aa503 2530 return -ENOMEDIUM;
651d47d1 2531 }
efdb0237 2532
5da38d07
TH
2533 return 0;
2534}
2535
c22800e4 2536int cg_unified_controller(const char *controller) {
b4cccbc1 2537 int r;
5da38d07 2538
1fcca10e 2539 r = cg_unified_update();
b4cccbc1
LP
2540 if (r < 0)
2541 return r;
5da38d07 2542
fc9ae717
LP
2543 if (unified_cache == CGROUP_UNIFIED_NONE)
2544 return false;
2545
2546 if (unified_cache >= CGROUP_UNIFIED_ALL)
2547 return true;
2548
2549 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2550}
2551
b4cccbc1 2552int cg_all_unified(void) {
4bb652ac
LP
2553 int r;
2554
2555 r = cg_unified_update();
2556 if (r < 0)
2557 return r;
2558
2559 return unified_cache >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2560}
2561
b4cccbc1
LP
2562int cg_hybrid_unified(void) {
2563 int r;
2977724b 2564
1fcca10e 2565 r = cg_unified_update();
b4cccbc1
LP
2566 if (r < 0)
2567 return r;
2977724b 2568
f08e9287 2569 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2977724b
TH
2570}
2571
415fc41c 2572int cg_unified_flush(void) {
5da38d07 2573 unified_cache = CGROUP_UNIFIED_UNKNOWN;
415fc41c 2574
1fcca10e 2575 return cg_unified_update();
efdb0237
LP
2576}
2577
2578int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
77fa610b 2579 _cleanup_fclose_ FILE *f = NULL;
efdb0237
LP
2580 _cleanup_free_ char *fs = NULL;
2581 CGroupController c;
415fc41c 2582 int r;
efdb0237
LP
2583
2584 assert(p);
2585
2586 if (supported == 0)
2587 return 0;
2588
b4cccbc1
LP
2589 r = cg_all_unified();
2590 if (r < 0)
2591 return r;
2592 if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
efdb0237
LP
2593 return 0;
2594
2595 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2596 if (r < 0)
2597 return r;
2598
2599 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2600 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2601 const char *n;
2602
ab275f23
LP
2603 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
2604 continue;
2605
efdb0237
LP
2606 if (!(supported & bit))
2607 continue;
2608
2609 n = cgroup_controller_to_string(c);
2610 {
2611 char s[1 + strlen(n) + 1];
2612
2613 s[0] = mask & bit ? '+' : '-';
2614 strcpy(s + 1, n);
2615
77fa610b
LP
2616 if (!f) {
2617 f = fopen(fs, "we");
2618 if (!f) {
2619 log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2620 break;
2621 }
2622 }
2623
604028de 2624 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
96aa6591 2625 if (r < 0) {
98e4d8d7 2626 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
96aa6591
PH
2627 clearerr(f);
2628 }
efdb0237
LP
2629 }
2630 }
2631
2632 return 0;
2633}
2634
2635bool cg_is_unified_wanted(void) {
2636 static thread_local int wanted = -1;
415fc41c 2637 int r;
1d84ad94 2638 bool b;
77fab2a9 2639 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
efdb0237 2640
77fab2a9 2641 /* If we have a cached value, return that. */
efdb0237
LP
2642 if (wanted >= 0)
2643 return wanted;
2644
239a3d09
ZJS
2645 /* If the hierarchy is already mounted, then follow whatever
2646 * was chosen for it. */
2647 if (cg_unified_flush() >= 0)
b4cccbc1 2648 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
239a3d09 2649
77fab2a9
ZJS
2650 /* Otherwise, let's see what the kernel command line has to say.
2651 * Since checking is expensive, cache a non-error result. */
1d84ad94 2652 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
efdb0237 2653
77fab2a9 2654 return (wanted = r > 0 ? b : is_default);
efdb0237
LP
2655}
2656
2657bool cg_is_legacy_wanted(void) {
239a3d09
ZJS
2658 static thread_local int wanted = -1;
2659
2660 /* If we have a cached value, return that. */
2661 if (wanted >= 0)
2662 return wanted;
2663
1b59cf04
ZJS
2664 /* Check if we have cgroups2 already mounted. */
2665 if (cg_unified_flush() >= 0 &&
2666 unified_cache == CGROUP_UNIFIED_ALL)
239a3d09 2667 return (wanted = false);
1b59cf04
ZJS
2668
2669 /* Otherwise, assume that at least partial legacy is wanted,
2670 * since cgroups2 should already be mounted at this point. */
239a3d09 2671 return (wanted = true);
efdb0237
LP
2672}
2673
a4464b95 2674bool cg_is_hybrid_wanted(void) {
5da38d07 2675 static thread_local int wanted = -1;
415fc41c 2676 int r;
1d84ad94 2677 bool b;
c19739db
ZJS
2678 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2679 /* We default to true if the default is "hybrid", obviously,
2680 * but also when the default is "unified", because if we get
2681 * called, it means that unified hierarchy was not mounted. */
5da38d07 2682
77fab2a9 2683 /* If we have a cached value, return that. */
5da38d07
TH
2684 if (wanted >= 0)
2685 return wanted;
2686
239a3d09
ZJS
2687 /* If the hierarchy is already mounted, then follow whatever
2688 * was chosen for it. */
2689 if (cg_unified_flush() >= 0 &&
2690 unified_cache == CGROUP_UNIFIED_ALL)
2691 return (wanted = false);
2692
77fab2a9
ZJS
2693 /* Otherwise, let's see what the kernel command line has to say.
2694 * Since checking is expensive, cache a non-error result. */
1d84ad94 2695 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
5da38d07 2696
2dcb526d
ZJS
2697 /* The meaning of the kernel option is reversed wrt. to the return value
2698 * of this function, hence the negation. */
77fab2a9 2699 return (wanted = r > 0 ? !b : is_default);
5da38d07
TH
2700}
2701
13c31542
TH
2702int cg_weight_parse(const char *s, uint64_t *ret) {
2703 uint64_t u;
2704 int r;
2705
2706 if (isempty(s)) {
2707 *ret = CGROUP_WEIGHT_INVALID;
2708 return 0;
2709 }
2710
2711 r = safe_atou64(s, &u);
2712 if (r < 0)
2713 return r;
2714
2715 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2716 return -ERANGE;
2717
2718 *ret = u;
2719 return 0;
2720}
2721
9be57249
TH
2722const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2723 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2724 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2725 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2726 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2727};
2728
2729static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2730 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2731 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2732 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2733 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2734};
2735
2736DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2737
d53d9474
LP
2738int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2739 uint64_t u;
2740 int r;
2741
2742 if (isempty(s)) {
2743 *ret = CGROUP_CPU_SHARES_INVALID;
2744 return 0;
2745 }
2746
2747 r = safe_atou64(s, &u);
2748 if (r < 0)
2749 return r;
2750
2751 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2752 return -ERANGE;
2753
2754 *ret = u;
2755 return 0;
2756}
2757
2758int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2759 uint64_t u;
2760 int r;
2761
2762 if (isempty(s)) {
2763 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2764 return 0;
2765 }
2766
2767 r = safe_atou64(s, &u);
2768 if (r < 0)
2769 return r;
2770
2771 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2772 return -ERANGE;
2773
2774 *ret = u;
2775 return 0;
2776}
2777
f0bef277
EV
2778bool is_cgroup_fs(const struct statfs *s) {
2779 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2780 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2781}
2782
2783bool fd_is_cgroup_fs(int fd) {
2784 struct statfs s;
2785
2786 if (fstatfs(fd, &s) < 0)
2787 return -errno;
2788
2789 return is_cgroup_fs(&s);
2790}
2791
efdb0237
LP
2792static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2793 [CGROUP_CONTROLLER_CPU] = "cpu",
2794 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
13c31542 2795 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2796 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2797 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2798 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2799 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2800 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2801 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2802};
2803
2804DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);