]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
util: use extract_first_word() instead of strsep()
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
8c6db833 2
84ac7bea 3#include <dirent.h>
8c6db833 4#include <errno.h>
84ac7bea 5#include <ftw.h>
11c3a366 6#include <limits.h>
8c6db833 7#include <signal.h>
11c3a366 8#include <stddef.h>
8c6db833 9#include <stdlib.h>
84ac7bea 10#include <string.h>
672c48cc 11#include <sys/stat.h>
11c3a366 12#include <sys/statfs.h>
672c48cc 13#include <sys/types.h>
f98c2585 14#include <sys/utsname.h>
4b58153d 15#include <sys/xattr.h>
84ac7bea 16#include <unistd.h>
8c6db833 17
b5efdb8a 18#include "alloc-util.h"
3ffd4af2 19#include "cgroup-util.h"
93cc7779 20#include "def.h"
a0956174 21#include "dirent-util.h"
84ac7bea 22#include "extract-word.h"
3ffd4af2 23#include "fd-util.h"
84ac7bea 24#include "fileio.h"
f97b34a6 25#include "format-util.h"
f4f15635 26#include "fs-util.h"
93cc7779 27#include "log.h"
84ac7bea
LP
28#include "login-util.h"
29#include "macro.h"
93cc7779 30#include "missing.h"
84ac7bea 31#include "mkdir.h"
6bedfcbb 32#include "parse-util.h"
9eb977db 33#include "path-util.h"
872a590e 34#include "proc-cmdline.h"
84ac7bea
LP
35#include "process-util.h"
36#include "set.h"
9444b1f2 37#include "special.h"
872a590e 38#include "stat-util.h"
d054f0a4 39#include "stdio-util.h"
8b43440b 40#include "string-table.h"
07630cea 41#include "string-util.h"
aae7e17f 42#include "strv.h"
84ac7bea 43#include "unit-name.h"
b1d4f8e1 44#include "user-util.h"
8c6db833 45
c6c18be3 46int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
7027ff61 47 _cleanup_free_ char *fs = NULL;
c6c18be3 48 FILE *f;
7027ff61 49 int r;
c6c18be3 50
c6c18be3
LP
51 assert(_f);
52
c3175a7f
LP
53 r = cg_get_path(controller, path, "cgroup.procs", &fs);
54 if (r < 0)
c6c18be3
LP
55 return r;
56
57 f = fopen(fs, "re");
c6c18be3
LP
58 if (!f)
59 return -errno;
60
61 *_f = f;
62 return 0;
63}
64
c6c18be3
LP
65int cg_read_pid(FILE *f, pid_t *_pid) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See
69 * cgroups.txt for details. */
70
7027ff61
LP
71 assert(f);
72 assert(_pid);
73
c6c18be3
LP
74 errno = 0;
75 if (fscanf(f, "%lu", &ul) != 1) {
76
77 if (feof(f))
78 return 0;
79
f5e5c28f 80 return errno > 0 ? -errno : -EIO;
c6c18be3
LP
81 }
82
83 if (ul <= 0)
84 return -EIO;
85
86 *_pid = (pid_t) ul;
87 return 1;
88}
89
8b238b13
LP
90int cg_read_event(
91 const char *controller,
92 const char *path,
93 const char *event,
31a9be23 94 char **ret) {
8b238b13 95
ab2c3861 96 _cleanup_free_ char *events = NULL, *content = NULL;
ab2c3861
TH
97 int r;
98
99 r = cg_get_path(controller, path, "cgroup.events", &events);
100 if (r < 0)
101 return r;
102
103 r = read_full_file(events, &content, NULL);
104 if (r < 0)
105 return r;
106
31a9be23
YW
107 for (const char *p = content;;) {
108 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
109 const char *q;
110
111 r = extract_first_word(&p, &line, "\n", 0);
112 if (r < 0)
113 return r;
114 if (r == 0)
115 return -ENOENT;
116
117 q = line;
118 r = extract_first_word(&q, &key, " ", 0);
119 if (r < 0)
120 return r;
121 if (r == 0)
ab2c3861
TH
122 return -EINVAL;
123
31a9be23 124 if (!streq(key, event))
ab2c3861
TH
125 continue;
126
31a9be23
YW
127 val = strdup(q);
128 if (!val)
129 return -ENOMEM;
130
131 *ret = TAKE_PTR(val);
ab2c3861
TH
132 return 0;
133 }
ab2c3861
TH
134}
135
3228995c
CB
136bool cg_ns_supported(void) {
137 static thread_local int enabled = -1;
138
139 if (enabled >= 0)
140 return enabled;
141
0887fa71
LP
142 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
143 if (errno != ENOENT)
144 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
145 enabled = false;
146 } else
147 enabled = true;
3228995c
CB
148
149 return enabled;
150}
151
35d2e7ec 152int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 153 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
154 int r;
155 DIR *d;
156
35d2e7ec
LP
157 assert(_d);
158
159 /* This is not recursive! */
160
c3175a7f
LP
161 r = cg_get_path(controller, path, NULL, &fs);
162 if (r < 0)
35d2e7ec
LP
163 return r;
164
165 d = opendir(fs);
35d2e7ec
LP
166 if (!d)
167 return -errno;
168
169 *_d = d;
170 return 0;
171}
172
173int cg_read_subgroup(DIR *d, char **fn) {
174 struct dirent *de;
175
176 assert(d);
7027ff61 177 assert(fn);
35d2e7ec 178
f01327ad 179 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
180 char *b;
181
182 if (de->d_type != DT_DIR)
183 continue;
184
49bfc877 185 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
186 continue;
187
7027ff61
LP
188 b = strdup(de->d_name);
189 if (!b)
35d2e7ec
LP
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
35d2e7ec
LP
196 return 0;
197}
198
4ad49000 199int cg_rmdir(const char *controller, const char *path) {
7027ff61 200 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
201 int r;
202
ad293f5a
LP
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
35d2e7ec
LP
205 return r;
206
207 r = rmdir(p);
7027ff61
LP
208 if (r < 0 && errno != ENOENT)
209 return -errno;
35d2e7ec 210
b4cccbc1 211 r = cg_hybrid_unified();
f20db199 212 if (r <= 0)
b4cccbc1 213 return r;
b4cccbc1
LP
214
215 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
216 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
217 if (r < 0)
218 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
219 }
220
7027ff61 221 return 0;
35d2e7ec
LP
222}
223
1d98fef1
LP
224int cg_kill(
225 const char *controller,
226 const char *path,
227 int sig,
228 CGroupFlags flags,
229 Set *s,
230 cg_kill_log_func_t log_kill,
231 void *userdata) {
232
7027ff61 233 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 234 bool done = false;
c53d2d54 235 int r, ret = 0, ret_log_kill = 0;
35d2e7ec 236 pid_t my_pid;
8c6db833 237
8c6db833
LP
238 assert(sig >= 0);
239
0d5b4810
LP
240 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
241 * SIGCONT on SIGKILL. */
242 if (IN_SET(sig, SIGCONT, SIGKILL))
243 flags &= ~CGROUP_SIGCONT;
244
8c6db833
LP
245 /* This goes through the tasks list and kills them all. This
246 * is repeated until no further processes are added to the
247 * tasks list, to properly handle forking processes */
248
7027ff61 249 if (!s) {
d5099efc 250 s = allocated_set = set_new(NULL);
7027ff61 251 if (!s)
ca949c9d 252 return -ENOMEM;
7027ff61 253 }
8c6db833 254
df0ff127 255 my_pid = getpid_cached();
8c6db833
LP
256
257 do {
7027ff61 258 _cleanup_fclose_ FILE *f = NULL;
0b172489 259 pid_t pid = 0;
8c6db833
LP
260 done = true;
261
7027ff61
LP
262 r = cg_enumerate_processes(controller, path, &f);
263 if (r < 0) {
4c633005 264 if (ret >= 0 && r != -ENOENT)
7027ff61 265 return r;
35d2e7ec 266
7027ff61 267 return ret;
35d2e7ec 268 }
c6c18be3
LP
269
270 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 271
1d98fef1 272 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 273 continue;
8c6db833 274
fea72cc0 275 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 276 continue;
8c6db833 277
1d98fef1 278 if (log_kill)
c53d2d54 279 ret_log_kill = log_kill(pid, sig, userdata);
1d98fef1 280
8c6db833
LP
281 /* If we haven't killed this process yet, kill
282 * it */
4c633005
LP
283 if (kill(pid, sig) < 0) {
284 if (ret >= 0 && errno != ESRCH)
8c6db833 285 ret = -errno;
6e8314c4 286 } else {
1d98fef1 287 if (flags & CGROUP_SIGCONT)
e155a0aa 288 (void) kill(pid, SIGCONT);
430c18ed 289
c53d2d54
DB
290 if (ret == 0) {
291 if (log_kill)
292 ret = ret_log_kill;
293 else
294 ret = 1;
295 }
430c18ed 296 }
8c6db833 297
8c6db833
LP
298 done = false;
299
fea72cc0 300 r = set_put(s, PID_TO_PTR(pid));
7027ff61 301 if (r < 0) {
35d2e7ec 302 if (ret >= 0)
7027ff61 303 return r;
35d2e7ec 304
7027ff61 305 return ret;
35d2e7ec
LP
306 }
307 }
308
309 if (r < 0) {
310 if (ret >= 0)
7027ff61 311 return r;
35d2e7ec 312
7027ff61 313 return ret;
8c6db833
LP
314 }
315
8c6db833
LP
316 /* To avoid racing against processes which fork
317 * quicker than we can kill them we repeat this until
318 * no new pids need to be killed. */
319
35d2e7ec 320 } while (!done);
8c6db833 321
35d2e7ec 322 return ret;
8c6db833
LP
323}
324
1d98fef1
LP
325int cg_kill_recursive(
326 const char *controller,
327 const char *path,
328 int sig,
329 CGroupFlags flags,
330 Set *s,
331 cg_kill_log_func_t log_kill,
332 void *userdata) {
333
7027ff61
LP
334 _cleanup_set_free_ Set *allocated_set = NULL;
335 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 336 int r, ret;
35d2e7ec 337 char *fn;
8c6db833
LP
338
339 assert(path);
8c6db833
LP
340 assert(sig >= 0);
341
7027ff61 342 if (!s) {
d5099efc 343 s = allocated_set = set_new(NULL);
7027ff61 344 if (!s)
ca949c9d 345 return -ENOMEM;
7027ff61 346 }
ca949c9d 347
1d98fef1 348 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 349
7027ff61
LP
350 r = cg_enumerate_subgroups(controller, path, &d);
351 if (r < 0) {
4c633005 352 if (ret >= 0 && r != -ENOENT)
7027ff61 353 return r;
8c6db833 354
7027ff61 355 return ret;
35d2e7ec 356 }
8c6db833 357
35d2e7ec 358 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 359 _cleanup_free_ char *p = NULL;
8c6db833 360
605405c6 361 p = strjoin(path, "/", fn);
35d2e7ec 362 free(fn);
7027ff61
LP
363 if (!p)
364 return -ENOMEM;
8c6db833 365
1d98fef1 366 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 367 if (r != 0 && ret >= 0)
35d2e7ec 368 ret = r;
8c6db833 369 }
7027ff61 370 if (ret >= 0 && r < 0)
35d2e7ec
LP
371 ret = r;
372
1d98fef1 373 if (flags & CGROUP_REMOVE) {
4ad49000 374 r = cg_rmdir(controller, path);
4c701096 375 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
7027ff61
LP
376 return r;
377 }
ca949c9d 378
8c6db833
LP
379 return ret;
380}
381
1d98fef1
LP
382int cg_migrate(
383 const char *cfrom,
384 const char *pfrom,
385 const char *cto,
386 const char *pto,
387 CGroupFlags flags) {
388
35d2e7ec 389 bool done = false;
246aa6dd 390 _cleanup_set_free_ Set *s = NULL;
8c6db833
LP
391 int r, ret = 0;
392 pid_t my_pid;
393
246aa6dd
LP
394 assert(cfrom);
395 assert(pfrom);
396 assert(cto);
397 assert(pto);
8c6db833 398
d5099efc 399 s = set_new(NULL);
246aa6dd 400 if (!s)
35d2e7ec
LP
401 return -ENOMEM;
402
df0ff127 403 my_pid = getpid_cached();
8c6db833
LP
404
405 do {
7027ff61 406 _cleanup_fclose_ FILE *f = NULL;
0b172489 407 pid_t pid = 0;
8c6db833
LP
408 done = true;
409
b043cd0b 410 r = cg_enumerate_processes(cfrom, pfrom, &f);
246aa6dd 411 if (r < 0) {
4c633005 412 if (ret >= 0 && r != -ENOENT)
7027ff61 413 return r;
35d2e7ec 414
246aa6dd 415 return ret;
35d2e7ec 416 }
c6c18be3
LP
417
418 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 419
35d2e7ec
LP
420 /* This might do weird stuff if we aren't a
421 * single-threaded program. However, we
422 * luckily know we are not */
1d98fef1 423 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 424 continue;
8c6db833 425
fea72cc0 426 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
35d2e7ec
LP
427 continue;
428
9b84c7f9
LP
429 /* Ignore kernel threads. Since they can only
430 * exist in the root cgroup, we only check for
431 * them there. */
432 if (cfrom &&
57ea45e1 433 empty_or_root(pfrom) &&
9b84c7f9
LP
434 is_kernel_thread(pid) > 0)
435 continue;
436
246aa6dd
LP
437 r = cg_attach(cto, pto, pid);
438 if (r < 0) {
4c633005 439 if (ret >= 0 && r != -ESRCH)
35d2e7ec
LP
440 ret = r;
441 } else if (ret == 0)
442 ret = 1;
8c6db833 443
8c6db833 444 done = false;
35d2e7ec 445
fea72cc0 446 r = set_put(s, PID_TO_PTR(pid));
246aa6dd 447 if (r < 0) {
35d2e7ec 448 if (ret >= 0)
7027ff61 449 return r;
35d2e7ec 450
246aa6dd 451 return ret;
35d2e7ec
LP
452 }
453 }
454
455 if (r < 0) {
456 if (ret >= 0)
7027ff61 457 return r;
35d2e7ec 458
246aa6dd 459 return ret;
8c6db833 460 }
35d2e7ec 461 } while (!done);
8c6db833 462
35d2e7ec 463 return ret;
8c6db833
LP
464}
465
4ad49000
LP
466int cg_migrate_recursive(
467 const char *cfrom,
468 const char *pfrom,
469 const char *cto,
470 const char *pto,
1d98fef1 471 CGroupFlags flags) {
4ad49000 472
246aa6dd 473 _cleanup_closedir_ DIR *d = NULL;
7027ff61 474 int r, ret = 0;
35d2e7ec 475 char *fn;
8c6db833 476
246aa6dd
LP
477 assert(cfrom);
478 assert(pfrom);
479 assert(cto);
480 assert(pto);
8c6db833 481
1d98fef1 482 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
8c6db833 483
246aa6dd
LP
484 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
485 if (r < 0) {
4c633005 486 if (ret >= 0 && r != -ENOENT)
7027ff61
LP
487 return r;
488
246aa6dd 489 return ret;
35d2e7ec
LP
490 }
491
492 while ((r = cg_read_subgroup(d, &fn)) > 0) {
246aa6dd 493 _cleanup_free_ char *p = NULL;
8c6db833 494
605405c6 495 p = strjoin(pfrom, "/", fn);
35d2e7ec 496 free(fn);
e155a0aa
LP
497 if (!p)
498 return -ENOMEM;
8c6db833 499
1d98fef1 500 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
35d2e7ec
LP
501 if (r != 0 && ret >= 0)
502 ret = r;
8c6db833
LP
503 }
504
35d2e7ec
LP
505 if (r < 0 && ret >= 0)
506 ret = r;
507
1d98fef1 508 if (flags & CGROUP_REMOVE) {
4ad49000 509 r = cg_rmdir(cfrom, pfrom);
4c701096 510 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
246aa6dd
LP
511 return r;
512 }
8c6db833
LP
513
514 return ret;
515}
516
13b84ec7
LP
517int cg_migrate_recursive_fallback(
518 const char *cfrom,
519 const char *pfrom,
520 const char *cto,
521 const char *pto,
1d98fef1 522 CGroupFlags flags) {
13b84ec7
LP
523
524 int r;
525
526 assert(cfrom);
527 assert(pfrom);
528 assert(cto);
529 assert(pto);
530
1d98fef1 531 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
13b84ec7
LP
532 if (r < 0) {
533 char prefix[strlen(pto) + 1];
534
535 /* This didn't work? Then let's try all prefixes of the destination */
536
fecffe5d 537 PATH_FOREACH_PREFIX(prefix, pto) {
e155a0aa
LP
538 int q;
539
1d98fef1 540 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
e155a0aa
LP
541 if (q >= 0)
542 return q;
13b84ec7
LP
543 }
544 }
545
e155a0aa 546 return r;
13b84ec7
LP
547}
548
efdb0237
LP
549static const char *controller_to_dirname(const char *controller) {
550 const char *e;
3474ae3c 551
7027ff61
LP
552 assert(controller);
553
efdb0237
LP
554 /* Converts a controller name to the directory name below
555 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
556 * just cuts off the name= prefixed used for named
557 * hierarchies, if it is specified. */
558
2977724b 559 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 560 if (cg_hybrid_unified() > 0)
2977724b
TH
561 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
562 else
563 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
564 }
b6629c4b 565
efdb0237
LP
566 e = startswith(controller, "name=");
567 if (e)
568 return e;
569
570 return controller;
3474ae3c
LP
571}
572
569b19d8
LP
573static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
574 const char *dn;
018ef268 575 char *t = NULL;
3474ae3c 576
efdb0237 577 assert(fs);
569b19d8
LP
578 assert(controller);
579
580 dn = controller_to_dirname(controller);
efdb0237
LP
581
582 if (isempty(path) && isempty(suffix))
569b19d8 583 t = strappend("/sys/fs/cgroup/", dn);
efdb0237 584 else if (isempty(path))
605405c6 585 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
efdb0237 586 else if (isempty(suffix))
605405c6 587 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
efdb0237 588 else
605405c6 589 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
efdb0237
LP
590 if (!t)
591 return -ENOMEM;
3474ae3c 592
efdb0237
LP
593 *fs = t;
594 return 0;
595}
596
597static int join_path_unified(const char *path, const char *suffix, char **fs) {
598 char *t;
599
600 assert(fs);
601
602 if (isempty(path) && isempty(suffix))
603 t = strdup("/sys/fs/cgroup");
604 else if (isempty(path))
605 t = strappend("/sys/fs/cgroup/", suffix);
606 else if (isempty(suffix))
607 t = strappend("/sys/fs/cgroup/", path);
608 else
605405c6 609 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
3474ae3c
LP
610 if (!t)
611 return -ENOMEM;
612
efdb0237 613 *fs = t;
3474ae3c
LP
614 return 0;
615}
616
8c6db833 617int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 618 int r;
8c6db833 619
dbd821ac
LP
620 assert(fs);
621
efdb0237
LP
622 if (!controller) {
623 char *t;
624
569b19d8
LP
625 /* If no controller is specified, we return the path
626 * *below* the controllers, without any prefix. */
efdb0237
LP
627
628 if (!path && !suffix)
629 return -EINVAL;
630
989189ea 631 if (!suffix)
efdb0237 632 t = strdup(path);
989189ea 633 else if (!path)
efdb0237
LP
634 t = strdup(suffix);
635 else
605405c6 636 t = strjoin(path, "/", suffix);
efdb0237
LP
637 if (!t)
638 return -ENOMEM;
639
858d36c1 640 *fs = path_simplify(t, false);
efdb0237
LP
641 return 0;
642 }
643
644 if (!cg_controller_is_valid(controller))
78edb35a
LP
645 return -EINVAL;
646
b4cccbc1
LP
647 r = cg_all_unified();
648 if (r < 0)
649 return r;
650 if (r > 0)
efdb0237 651 r = join_path_unified(path, suffix, fs);
569b19d8
LP
652 else
653 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
654 if (r < 0)
655 return r;
7027ff61 656
858d36c1 657 path_simplify(*fs, false);
efdb0237 658 return 0;
3474ae3c 659}
dbd821ac 660
efdb0237 661static int controller_is_accessible(const char *controller) {
b4cccbc1 662 int r;
37099707 663
efdb0237 664 assert(controller);
37099707 665
efdb0237
LP
666 /* Checks whether a specific controller is accessible,
667 * i.e. its hierarchy mounted. In the unified hierarchy all
668 * controllers are considered accessible, except for the named
669 * hierarchies */
b12afc8c 670
efdb0237
LP
671 if (!cg_controller_is_valid(controller))
672 return -EINVAL;
673
b4cccbc1
LP
674 r = cg_all_unified();
675 if (r < 0)
676 return r;
677 if (r > 0) {
efdb0237
LP
678 /* We don't support named hierarchies if we are using
679 * the unified hierarchy. */
680
681 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
682 return 0;
683
684 if (startswith(controller, "name="))
685 return -EOPNOTSUPP;
686
687 } else {
688 const char *cc, *dn;
689
690 dn = controller_to_dirname(controller);
691 cc = strjoina("/sys/fs/cgroup/", dn);
692
693 if (laccess(cc, F_OK) < 0)
694 return -errno;
695 }
37099707
LP
696
697 return 0;
698}
699
3474ae3c 700int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 701 int r;
dbd821ac 702
efdb0237 703 assert(controller);
3474ae3c 704 assert(fs);
70132bd0 705
efdb0237
LP
706 /* Check if the specified controller is actually accessible */
707 r = controller_is_accessible(controller);
37099707
LP
708 if (r < 0)
709 return r;
3474ae3c 710
efdb0237 711 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
712}
713
e27796a0 714static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
4ad49000
LP
715 assert(path);
716 assert(sb);
717 assert(ftwbuf);
e27796a0
LP
718
719 if (typeflag != FTW_DP)
720 return 0;
721
722 if (ftwbuf->level < 1)
723 return 0;
724
e155a0aa 725 (void) rmdir(path);
e27796a0
LP
726 return 0;
727}
728
8c6db833 729int cg_trim(const char *controller, const char *path, bool delete_root) {
7027ff61 730 _cleanup_free_ char *fs = NULL;
2977724b 731 int r = 0, q;
8c6db833 732
8c6db833
LP
733 assert(path);
734
e27796a0
LP
735 r = cg_get_path(controller, path, NULL, &fs);
736 if (r < 0)
8c6db833
LP
737 return r;
738
e27796a0 739 errno = 0;
e155a0aa
LP
740 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
741 if (errno == ENOENT)
742 r = 0;
b3267152 743 else if (errno > 0)
e155a0aa
LP
744 r = -errno;
745 else
746 r = -EIO;
747 }
e27796a0
LP
748
749 if (delete_root) {
4ad49000
LP
750 if (rmdir(fs) < 0 && errno != ENOENT)
751 return -errno;
e27796a0
LP
752 }
753
b4cccbc1
LP
754 q = cg_hybrid_unified();
755 if (q < 0)
756 return q;
757 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
758 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
759 if (q < 0)
760 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
761 }
762
e27796a0 763 return r;
8c6db833
LP
764}
765
65be7e06
ZJS
766/* Create a cgroup in the hierarchy of controller.
767 * Returns 0 if the group already existed, 1 on success, negative otherwise.
768 */
1434ae6f
LP
769int cg_create(const char *controller, const char *path) {
770 _cleanup_free_ char *fs = NULL;
771 int r;
772
773 r = cg_get_path_and_check(controller, path, NULL, &fs);
774 if (r < 0)
775 return r;
776
777 r = mkdir_parents(fs, 0755);
778 if (r < 0)
779 return r;
780
dae8b82e
ZJS
781 r = mkdir_errno_wrapper(fs, 0755);
782 if (r == -EEXIST)
783 return 0;
784 if (r < 0)
785 return r;
1434ae6f 786
b4cccbc1
LP
787 r = cg_hybrid_unified();
788 if (r < 0)
789 return r;
790
791 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
792 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
793 if (r < 0)
794 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
795 }
796
1434ae6f
LP
797 return 1;
798}
799
800int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
801 int r, q;
802
803 assert(pid >= 0);
804
805 r = cg_create(controller, path);
806 if (r < 0)
807 return r;
808
809 q = cg_attach(controller, path, pid);
810 if (q < 0)
811 return q;
812
813 /* This does not remove the cgroup on failure */
814 return r;
815}
816
8c6db833 817int cg_attach(const char *controller, const char *path, pid_t pid) {
574d5f2d
LP
818 _cleanup_free_ char *fs = NULL;
819 char c[DECIMAL_STR_MAX(pid_t) + 2];
8c6db833
LP
820 int r;
821
8c6db833
LP
822 assert(path);
823 assert(pid >= 0);
824
b043cd0b 825 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
3474ae3c 826 if (r < 0)
c6c18be3 827 return r;
8c6db833
LP
828
829 if (pid == 0)
df0ff127 830 pid = getpid_cached();
8c6db833 831
d054f0a4 832 xsprintf(c, PID_FMT "\n", pid);
8c6db833 833
604028de 834 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
2977724b
TH
835 if (r < 0)
836 return r;
837
b4cccbc1
LP
838 r = cg_hybrid_unified();
839 if (r < 0)
840 return r;
841
842 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
843 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
844 if (r < 0)
bd68e99b 845 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
2977724b
TH
846 }
847
848 return 0;
8c6db833
LP
849}
850
13b84ec7
LP
851int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
852 int r;
853
854 assert(controller);
855 assert(path);
856 assert(pid >= 0);
857
858 r = cg_attach(controller, path, pid);
859 if (r < 0) {
860 char prefix[strlen(path) + 1];
861
862 /* This didn't work? Then let's try all prefixes of
863 * the destination */
864
fecffe5d 865 PATH_FOREACH_PREFIX(prefix, path) {
e155a0aa
LP
866 int q;
867
868 q = cg_attach(controller, prefix, pid);
869 if (q >= 0)
870 return q;
13b84ec7
LP
871 }
872 }
873
e155a0aa 874 return r;
13b84ec7
LP
875}
876
62b9bb26 877int cg_set_access(
2d76d14e
LP
878 const char *controller,
879 const char *path,
2d76d14e
LP
880 uid_t uid,
881 gid_t gid) {
882
62b9bb26
LP
883 struct Attribute {
884 const char *name;
885 bool fatal;
886 };
887
4e1dfa45 888 /* cgroup v1, aka legacy/non-unified */
62b9bb26
LP
889 static const struct Attribute legacy_attributes[] = {
890 { "cgroup.procs", true },
891 { "tasks", false },
892 { "cgroup.clone_children", false },
893 {},
894 };
895
4e1dfa45 896 /* cgroup v2, aka unified */
62b9bb26
LP
897 static const struct Attribute unified_attributes[] = {
898 { "cgroup.procs", true },
899 { "cgroup.subtree_control", true },
900 { "cgroup.threads", false },
901 {},
902 };
903
904 static const struct Attribute* const attributes[] = {
905 [false] = legacy_attributes,
906 [true] = unified_attributes,
907 };
974efc46 908
40853aa5 909 _cleanup_free_ char *fs = NULL;
62b9bb26
LP
910 const struct Attribute *i;
911 int r, unified;
8c6db833 912
8c6db833
LP
913 assert(path);
914
62b9bb26 915 if (uid == UID_INVALID && gid == GID_INVALID)
8d53b453
LP
916 return 0;
917
62b9bb26
LP
918 unified = cg_unified_controller(controller);
919 if (unified < 0)
920 return unified;
8c6db833 921
62b9bb26
LP
922 /* Configure access to the cgroup itself */
923 r = cg_get_path(controller, path, NULL, &fs);
974efc46
LP
924 if (r < 0)
925 return r;
8c6db833 926
62b9bb26 927 r = chmod_and_chown(fs, 0755, uid, gid);
b4cccbc1
LP
928 if (r < 0)
929 return r;
40853aa5 930
62b9bb26
LP
931 /* Configure access to the cgroup's attributes */
932 for (i = attributes[unified]; i->name; i++) {
40853aa5 933 fs = mfree(fs);
40853aa5 934
62b9bb26 935 r = cg_get_path(controller, path, i->name, &fs);
40853aa5
LP
936 if (r < 0)
937 return r;
efdb0237 938
62b9bb26
LP
939 r = chmod_and_chown(fs, 0644, uid, gid);
940 if (r < 0) {
941 if (i->fatal)
942 return r;
5beac75e 943
62b9bb26
LP
944 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
945 }
946 }
947
948 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
949 r = cg_hybrid_unified();
2977724b 950 if (r < 0)
62b9bb26
LP
951 return r;
952 if (r > 0) {
953 /* Always propagate access mode from unified to legacy controller */
954 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
955 if (r < 0)
956 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
957 }
2977724b 958 }
974efc46 959
efdb0237 960 return 0;
8c6db833
LP
961}
962
4b58153d
LP
963int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
964 _cleanup_free_ char *fs = NULL;
965 int r;
966
967 assert(path);
968 assert(name);
969 assert(value || size <= 0);
970
971 r = cg_get_path(controller, path, NULL, &fs);
972 if (r < 0)
973 return r;
974
975 if (setxattr(fs, name, value, size, flags) < 0)
976 return -errno;
977
978 return 0;
979}
980
981int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
982 _cleanup_free_ char *fs = NULL;
983 ssize_t n;
984 int r;
985
986 assert(path);
987 assert(name);
988
989 r = cg_get_path(controller, path, NULL, &fs);
990 if (r < 0)
991 return r;
992
993 n = getxattr(fs, name, value, size);
994 if (n < 0)
995 return -errno;
996
997 return (int) n;
998}
999
7027ff61 1000int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61 1001 _cleanup_fclose_ FILE *f = NULL;
b6629c4b 1002 const char *fs, *controller_str;
d2b39cb6 1003 int unified, r;
efdb0237 1004 size_t cs = 0;
8c6db833 1005
8c6db833 1006 assert(path);
c6c18be3 1007 assert(pid >= 0);
8c6db833 1008
5da38d07
TH
1009 if (controller) {
1010 if (!cg_controller_is_valid(controller))
1011 return -EINVAL;
1012 } else
1013 controller = SYSTEMD_CGROUP_CONTROLLER;
1014
c22800e4 1015 unified = cg_unified_controller(controller);
b4cccbc1
LP
1016 if (unified < 0)
1017 return unified;
1018 if (unified == 0) {
b6629c4b
TH
1019 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1020 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1021 else
1022 controller_str = controller;
1023
1024 cs = strlen(controller_str);
1025 }
7027ff61 1026
b68fa010 1027 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
1028 r = fopen_unlocked(fs, "re", &f);
1029 if (r == -ENOENT)
1030 return -ESRCH;
1031 if (r < 0)
1032 return r;
35bbbf85 1033
d2b39cb6
LP
1034 for (;;) {
1035 _cleanup_free_ char *line = NULL;
efdb0237 1036 char *e, *p;
c6c18be3 1037
d2b39cb6
LP
1038 r = read_line(f, LONG_LINE_MAX, &line);
1039 if (r < 0)
1040 return r;
1041 if (r == 0)
1042 break;
c6c18be3 1043
efdb0237
LP
1044 if (unified) {
1045 e = startswith(line, "0:");
1046 if (!e)
1047 continue;
c6c18be3 1048
efdb0237
LP
1049 e = strchr(e, ':');
1050 if (!e)
1051 continue;
1052 } else {
1053 char *l;
1054 size_t k;
1055 const char *word, *state;
1056 bool found = false;
1057
1058 l = strchr(line, ':');
1059 if (!l)
1060 continue;
8af8afd6 1061
efdb0237
LP
1062 l++;
1063 e = strchr(l, ':');
1064 if (!e)
1065 continue;
8af8afd6 1066
efdb0237 1067 *e = 0;
00d4b1e6 1068 FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
b6629c4b 1069 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
1070 found = true;
1071 break;
1072 }
efdb0237
LP
1073 if (!found)
1074 continue;
8af8afd6
LP
1075 }
1076
8af8afd6 1077 p = strdup(e + 1);
7027ff61
LP
1078 if (!p)
1079 return -ENOMEM;
c6c18be3 1080
5e20b0a4
LP
1081 /* Truncate suffix indicating the process is a zombie */
1082 e = endswith(p, " (deleted)");
1083 if (e)
1084 *e = 0;
1085
c6c18be3 1086 *path = p;
7027ff61 1087 return 0;
c6c18be3
LP
1088 }
1089
1c80e425 1090 return -ENODATA;
8c6db833
LP
1091}
1092
1093int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 1094 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 1095 const char *sc;
415fc41c 1096 int r;
8c6db833 1097
8c6db833
LP
1098 assert(agent);
1099
c22800e4 1100 r = cg_unified_controller(controller);
b4cccbc1
LP
1101 if (r < 0)
1102 return r;
1103 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
1104 return -EOPNOTSUPP;
1105
7027ff61
LP
1106 r = cg_get_path(controller, NULL, "release_agent", &fs);
1107 if (r < 0)
c6c18be3 1108 return r;
8c6db833 1109
7027ff61
LP
1110 r = read_one_line_file(fs, &contents);
1111 if (r < 0)
1112 return r;
8c6db833
LP
1113
1114 sc = strstrip(contents);
e155a0aa 1115 if (isempty(sc)) {
604028de 1116 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 1117 if (r < 0)
7027ff61 1118 return r;
b8725df8 1119 } else if (!path_equal(sc, agent))
7027ff61 1120 return -EEXIST;
8c6db833 1121
0da16248 1122 fs = mfree(fs);
7027ff61
LP
1123 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1124 if (r < 0)
1125 return r;
8c6db833 1126
0da16248 1127 contents = mfree(contents);
7027ff61
LP
1128 r = read_one_line_file(fs, &contents);
1129 if (r < 0)
1130 return r;
8c6db833
LP
1131
1132 sc = strstrip(contents);
8c6db833 1133 if (streq(sc, "0")) {
604028de 1134 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
1135 if (r < 0)
1136 return r;
c6c18be3 1137
7027ff61
LP
1138 return 1;
1139 }
8c6db833 1140
7027ff61
LP
1141 if (!streq(sc, "1"))
1142 return -EIO;
8c6db833 1143
7027ff61 1144 return 0;
8c6db833
LP
1145}
1146
ad929bcc
KS
1147int cg_uninstall_release_agent(const char *controller) {
1148 _cleanup_free_ char *fs = NULL;
415fc41c 1149 int r;
efdb0237 1150
c22800e4 1151 r = cg_unified_controller(controller);
b4cccbc1
LP
1152 if (r < 0)
1153 return r;
1154 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 1155 return -EOPNOTSUPP;
ad929bcc 1156
ac9ef333
LP
1157 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1158 if (r < 0)
1159 return r;
1160
604028de 1161 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
1162 if (r < 0)
1163 return r;
1164
0da16248 1165 fs = mfree(fs);
ac9ef333 1166
ad929bcc
KS
1167 r = cg_get_path(controller, NULL, "release_agent", &fs);
1168 if (r < 0)
1169 return r;
1170
604028de 1171 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
1172 if (r < 0)
1173 return r;
1174
ac9ef333 1175 return 0;
ad929bcc
KS
1176}
1177
6f883237 1178int cg_is_empty(const char *controller, const char *path) {
7027ff61 1179 _cleanup_fclose_ FILE *f = NULL;
efdb0237 1180 pid_t pid;
7027ff61 1181 int r;
8c6db833 1182
8c6db833
LP
1183 assert(path);
1184
b043cd0b 1185 r = cg_enumerate_processes(controller, path, &f);
6f883237 1186 if (r == -ENOENT)
1bcf3fc6 1187 return true;
c3175a7f 1188 if (r < 0)
6f883237 1189 return r;
8c6db833 1190
6f883237 1191 r = cg_read_pid(f, &pid);
c6c18be3
LP
1192 if (r < 0)
1193 return r;
8c6db833 1194
6f883237 1195 return r == 0;
8c6db833
LP
1196}
1197
6f883237 1198int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 1199 int r;
8c6db833 1200
8c6db833
LP
1201 assert(path);
1202
6fd66507 1203 /* The root cgroup is always populated */
57ea45e1 1204 if (controller && empty_or_root(path))
efdb0237 1205 return false;
6fd66507 1206
c22800e4 1207 r = cg_unified_controller(controller);
b4cccbc1
LP
1208 if (r < 0)
1209 return r;
1210 if (r > 0) {
ab2c3861 1211 _cleanup_free_ char *t = NULL;
8c6db833 1212
efdb0237 1213 /* On the unified hierarchy we can check empty state
ab2c3861 1214 * via the "populated" attribute of "cgroup.events". */
8c6db833 1215
ab2c3861 1216 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
1217 if (r == -ENOENT)
1218 return true;
efdb0237
LP
1219 if (r < 0)
1220 return r;
1221
1222 return streq(t, "0");
1223 } else {
1224 _cleanup_closedir_ DIR *d = NULL;
1225 char *fn;
8c6db833 1226
efdb0237 1227 r = cg_is_empty(controller, path);
35d2e7ec 1228 if (r <= 0)
7027ff61 1229 return r;
35d2e7ec 1230
efdb0237
LP
1231 r = cg_enumerate_subgroups(controller, path, &d);
1232 if (r == -ENOENT)
1bcf3fc6 1233 return true;
efdb0237
LP
1234 if (r < 0)
1235 return r;
35d2e7ec 1236
efdb0237
LP
1237 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1238 _cleanup_free_ char *p = NULL;
1239
605405c6 1240 p = strjoin(path, "/", fn);
efdb0237
LP
1241 free(fn);
1242 if (!p)
1243 return -ENOMEM;
1244
1245 r = cg_is_empty_recursive(controller, p);
1246 if (r <= 0)
1247 return r;
1248 }
1249 if (r < 0)
1250 return r;
1251
1252 return true;
1253 }
35d2e7ec
LP
1254}
1255
1256int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 1257 char *t = NULL, *u = NULL;
efdb0237 1258 const char *e;
35d2e7ec
LP
1259
1260 assert(spec);
35d2e7ec
LP
1261
1262 if (*spec == '/') {
99be45a4 1263 if (!path_is_normalized(spec))
e884315e 1264 return -EINVAL;
35d2e7ec
LP
1265
1266 if (path) {
246aa6dd
LP
1267 t = strdup(spec);
1268 if (!t)
35d2e7ec
LP
1269 return -ENOMEM;
1270
858d36c1 1271 *path = path_simplify(t, false);
8c6db833
LP
1272 }
1273
35d2e7ec
LP
1274 if (controller)
1275 *controller = NULL;
1276
1277 return 0;
8c6db833
LP
1278 }
1279
246aa6dd
LP
1280 e = strchr(spec, ':');
1281 if (!e) {
185a0874 1282 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
1283 return -EINVAL;
1284
1285 if (controller) {
efdb0237 1286 t = strdup(spec);
246aa6dd 1287 if (!t)
35d2e7ec
LP
1288 return -ENOMEM;
1289
1290 *controller = t;
1291 }
1292
1293 if (path)
1294 *path = NULL;
1295
1296 return 0;
8c6db833
LP
1297 }
1298
efdb0237 1299 t = strndup(spec, e-spec);
e884315e
LP
1300 if (!t)
1301 return -ENOMEM;
185a0874 1302 if (!cg_controller_is_valid(t)) {
e884315e 1303 free(t);
35d2e7ec 1304 return -EINVAL;
246aa6dd
LP
1305 }
1306
efdb0237
LP
1307 if (isempty(e+1))
1308 u = NULL;
1309 else {
baa89da4
LP
1310 u = strdup(e+1);
1311 if (!u) {
1312 free(t);
1313 return -ENOMEM;
1314 }
35d2e7ec 1315
99be45a4 1316 if (!path_is_normalized(u) ||
baa89da4
LP
1317 !path_is_absolute(u)) {
1318 free(t);
1319 free(u);
1320 return -EINVAL;
1321 }
1322
858d36c1 1323 path_simplify(u, false);
baa89da4 1324 }
5954c074 1325
35d2e7ec
LP
1326 if (controller)
1327 *controller = t;
e884315e
LP
1328 else
1329 free(t);
35d2e7ec
LP
1330
1331 if (path)
1332 *path = u;
e884315e
LP
1333 else
1334 free(u);
35d2e7ec
LP
1335
1336 return 0;
8c6db833 1337}
c6c18be3 1338
7027ff61 1339int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
1340 _cleanup_free_ char *c = NULL, *p = NULL;
1341 char *t;
35d2e7ec
LP
1342 int r;
1343
1344 assert(path);
1345 assert(result);
1346
73e231ab 1347 /* First, check if it already is a filesystem path */
7027ff61 1348 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 1349
b69d29ce
LP
1350 t = strdup(path);
1351 if (!t)
35d2e7ec
LP
1352 return -ENOMEM;
1353
858d36c1 1354 *result = path_simplify(t, false);
35d2e7ec
LP
1355 return 0;
1356 }
1357
73e231ab 1358 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1359 r = cg_split_spec(path, &c, &p);
1360 if (r < 0)
35d2e7ec
LP
1361 return r;
1362
efdb0237 1363 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 1364}
1f73f0f1 1365
7027ff61 1366int cg_get_root_path(char **path) {
9444b1f2 1367 char *p, *e;
7027ff61
LP
1368 int r;
1369
1370 assert(path);
1371
9444b1f2 1372 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1373 if (r < 0)
1374 return r;
1375
efdb0237
LP
1376 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1377 if (!e)
1378 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1379 if (!e)
1380 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1381 if (e)
7027ff61
LP
1382 *e = 0;
1383
1f73f0f1
LP
1384 *path = p;
1385 return 0;
1386}
b59e2465 1387
751bc6ac
LP
1388int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1389 _cleanup_free_ char *rt = NULL;
1390 char *p;
ba1261bc
LP
1391 int r;
1392
e9174f29 1393 assert(cgroup);
751bc6ac 1394 assert(shifted);
e9174f29
LP
1395
1396 if (!root) {
1397 /* If the root was specified let's use that, otherwise
1398 * let's determine it from PID 1 */
1399
751bc6ac 1400 r = cg_get_root_path(&rt);
e9174f29
LP
1401 if (r < 0)
1402 return r;
1403
751bc6ac 1404 root = rt;
e9174f29 1405 }
ba1261bc 1406
751bc6ac 1407 p = path_startswith(cgroup, root);
efdb0237 1408 if (p && p > cgroup)
751bc6ac
LP
1409 *shifted = p - 1;
1410 else
1411 *shifted = cgroup;
1412
1413 return 0;
1414}
1415
1416int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1417 _cleanup_free_ char *raw = NULL;
1418 const char *c;
1419 int r;
1420
1421 assert(pid >= 0);
1422 assert(cgroup);
1423
1424 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1425 if (r < 0)
ba1261bc 1426 return r;
ba1261bc 1427
751bc6ac
LP
1428 r = cg_shift_path(raw, root, &c);
1429 if (r < 0)
1430 return r;
ba1261bc 1431
ae2a15bc
LP
1432 if (c == raw)
1433 *cgroup = TAKE_PTR(raw);
1434 else {
751bc6ac 1435 char *n;
ba1261bc 1436
751bc6ac
LP
1437 n = strdup(c);
1438 if (!n)
ba1261bc 1439 return -ENOMEM;
ba1261bc 1440
751bc6ac
LP
1441 *cgroup = n;
1442 }
ba1261bc
LP
1443
1444 return 0;
1445}
1446
9ed794a3 1447int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1448 char *c, *s;
1449 size_t n;
ef1673d1
MT
1450
1451 assert(cgroup);
6c03089c 1452 assert(unit);
ef1673d1 1453
8b0849e9
LP
1454 n = strcspn(cgroup, "/");
1455 if (n < 3)
1456 return -ENXIO;
1457
1458 c = strndupa(cgroup, n);
ae018d9b 1459 c = cg_unescape(c);
ef1673d1 1460
7410616c 1461 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1462 return -ENXIO;
ef1673d1 1463
d7bd3de0 1464 s = strdup(c);
6c03089c
LP
1465 if (!s)
1466 return -ENOMEM;
1467
1468 *unit = s;
ef1673d1
MT
1469 return 0;
1470}
1471
8b0849e9
LP
1472static bool valid_slice_name(const char *p, size_t n) {
1473
1474 if (!p)
1475 return false;
1476
fbd0b64f 1477 if (n < STRLEN("x.slice"))
8b0849e9
LP
1478 return false;
1479
1480 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1481 char buf[n+1], *c;
1482
1483 memcpy(buf, p, n);
1484 buf[n] = 0;
1485
1486 c = cg_unescape(buf);
1487
7410616c 1488 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1489 }
1490
1491 return false;
1492}
1493
9444b1f2 1494static const char *skip_slices(const char *p) {
8b0849e9
LP
1495 assert(p);
1496
9444b1f2
LP
1497 /* Skips over all slice assignments */
1498
1499 for (;;) {
1021b21b
LP
1500 size_t n;
1501
9444b1f2
LP
1502 p += strspn(p, "/");
1503
1504 n = strcspn(p, "/");
8b0849e9 1505 if (!valid_slice_name(p, n))
9444b1f2
LP
1506 return p;
1507
1508 p += n;
1509 }
1510}
1511
8b0849e9 1512int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1513 const char *e;
8b0849e9
LP
1514 char *unit;
1515 int r;
6c03089c
LP
1516
1517 assert(path);
8b0849e9 1518 assert(ret);
6c03089c 1519
9444b1f2 1520 e = skip_slices(path);
6c03089c 1521
8b0849e9
LP
1522 r = cg_path_decode_unit(e, &unit);
1523 if (r < 0)
1524 return r;
1525
1526 /* We skipped over the slices, don't accept any now */
1527 if (endswith(unit, ".slice")) {
1528 free(unit);
1529 return -ENXIO;
1530 }
1531
1532 *ret = unit;
1533 return 0;
6c03089c
LP
1534}
1535
1536int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1537 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1538 int r;
ba1261bc 1539
ef1673d1
MT
1540 assert(unit);
1541
7027ff61 1542 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1543 if (r < 0)
1544 return r;
1545
6c03089c
LP
1546 return cg_path_get_unit(cgroup, unit);
1547}
ef1673d1 1548
d4fffc4b
ZJS
1549/**
1550 * Skip session-*.scope, but require it to be there.
1551 */
9444b1f2
LP
1552static const char *skip_session(const char *p) {
1553 size_t n;
1554
8b0849e9
LP
1555 if (isempty(p))
1556 return NULL;
9444b1f2
LP
1557
1558 p += strspn(p, "/");
1559
1560 n = strcspn(p, "/");
fbd0b64f 1561 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1562 return NULL;
1563
8b0849e9
LP
1564 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1565 char buf[n - 8 - 6 + 1];
1566
1567 memcpy(buf, p + 8, n - 8 - 6);
1568 buf[n - 8 - 6] = 0;
d4fffc4b 1569
8b0849e9
LP
1570 /* Note that session scopes never need unescaping,
1571 * since they cannot conflict with the kernel's own
1572 * names, hence we don't need to call cg_unescape()
1573 * here. */
1574
1575 if (!session_id_valid(buf))
1576 return false;
1577
1578 p += n;
1579 p += strspn(p, "/");
1580 return p;
1581 }
1582
1583 return NULL;
d4fffc4b
ZJS
1584}
1585
1586/**
1587 * Skip user@*.service, but require it to be there.
1588 */
1589static const char *skip_user_manager(const char *p) {
1590 size_t n;
1591
8b0849e9
LP
1592 if (isempty(p))
1593 return NULL;
d4fffc4b
ZJS
1594
1595 p += strspn(p, "/");
1596
1597 n = strcspn(p, "/");
fbd0b64f 1598 if (n < STRLEN("user@x.service"))
6c03089c 1599 return NULL;
ef1673d1 1600
8b0849e9
LP
1601 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1602 char buf[n - 5 - 8 + 1];
9444b1f2 1603
8b0849e9
LP
1604 memcpy(buf, p + 5, n - 5 - 8);
1605 buf[n - 5 - 8] = 0;
1606
1607 /* Note that user manager services never need unescaping,
1608 * since they cannot conflict with the kernel's own
1609 * names, hence we don't need to call cg_unescape()
1610 * here. */
1611
1612 if (parse_uid(buf, NULL) < 0)
1613 return NULL;
1614
1615 p += n;
1616 p += strspn(p, "/");
1617
1618 return p;
1619 }
1620
1621 return NULL;
9444b1f2
LP
1622}
1623
329ac4bc 1624static const char *skip_user_prefix(const char *path) {
d4fffc4b 1625 const char *e, *t;
ef1673d1 1626
6c03089c 1627 assert(path);
ba1261bc 1628
9444b1f2
LP
1629 /* Skip slices, if there are any */
1630 e = skip_slices(path);
ba1261bc 1631
329ac4bc 1632 /* Skip the user manager, if it's in the path now... */
8b0849e9 1633 t = skip_user_manager(e);
329ac4bc
LP
1634 if (t)
1635 return t;
8b0849e9 1636
329ac4bc
LP
1637 /* Alternatively skip the user session if it is in the path... */
1638 return skip_session(e);
1639}
32081481 1640
329ac4bc
LP
1641int cg_path_get_user_unit(const char *path, char **ret) {
1642 const char *t;
6c03089c 1643
329ac4bc
LP
1644 assert(path);
1645 assert(ret);
8b0849e9 1646
329ac4bc
LP
1647 t = skip_user_prefix(path);
1648 if (!t)
8b0849e9 1649 return -ENXIO;
8b0849e9 1650
329ac4bc
LP
1651 /* And from here on it looks pretty much the same as for a
1652 * system unit, hence let's use the same parser from here
1653 * on. */
1654 return cg_path_get_unit(t, ret);
ef1673d1 1655}
ba1261bc 1656
ef1673d1 1657int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1658 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1659 int r;
1660
1661 assert(unit);
1662
7027ff61 1663 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1664 if (r < 0)
1665 return r;
1666
1667 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1668}
e884315e 1669
7027ff61 1670int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1671 _cleanup_free_ char *u = NULL;
1672 const char *sl;
89f7c846 1673 int r;
374ec6ab 1674
89f7c846
LP
1675 r = cg_path_get_unit(path, &u);
1676 if (r < 0)
1677 return r;
7027ff61 1678
efdb0237 1679 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1680 return readlink_malloc(sl, machine);
7027ff61
LP
1681}
1682
1683int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1684 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1685 int r;
1686
1687 assert(machine);
1688
1689 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1690 if (r < 0)
1691 return r;
1692
1693 return cg_path_get_machine_name(cgroup, machine);
1694}
1695
1696int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1697 _cleanup_free_ char *unit = NULL;
1698 char *start, *end;
1699 int r;
7027ff61
LP
1700
1701 assert(path);
7027ff61 1702
8b0849e9
LP
1703 r = cg_path_get_unit(path, &unit);
1704 if (r < 0)
1705 return r;
7027ff61 1706
8b0849e9
LP
1707 start = startswith(unit, "session-");
1708 if (!start)
cfeaa44a 1709 return -ENXIO;
8b0849e9
LP
1710 end = endswith(start, ".scope");
1711 if (!end)
cfeaa44a 1712 return -ENXIO;
8b0849e9
LP
1713
1714 *end = 0;
1715 if (!session_id_valid(start))
cfeaa44a 1716 return -ENXIO;
374ec6ab 1717
af08d2f9 1718 if (session) {
8b0849e9 1719 char *rr;
af08d2f9 1720
8b0849e9
LP
1721 rr = strdup(start);
1722 if (!rr)
af08d2f9
LP
1723 return -ENOMEM;
1724
8b0849e9 1725 *session = rr;
af08d2f9 1726 }
7027ff61 1727
7027ff61
LP
1728 return 0;
1729}
1730
1731int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1732 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1733 int r;
1734
7027ff61
LP
1735 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1736 if (r < 0)
1737 return r;
1738
1739 return cg_path_get_session(cgroup, session);
1740}
1741
ae018d9b 1742int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1743 _cleanup_free_ char *slice = NULL;
8b0849e9 1744 char *start, *end;
374ec6ab 1745 int r;
ae018d9b
LP
1746
1747 assert(path);
ae018d9b 1748
374ec6ab
LP
1749 r = cg_path_get_slice(path, &slice);
1750 if (r < 0)
1751 return r;
ae018d9b 1752
674eb685
LP
1753 start = startswith(slice, "user-");
1754 if (!start)
cfeaa44a 1755 return -ENXIO;
8b0849e9 1756 end = endswith(start, ".slice");
674eb685 1757 if (!end)
cfeaa44a 1758 return -ENXIO;
ae018d9b 1759
8b0849e9
LP
1760 *end = 0;
1761 if (parse_uid(start, uid) < 0)
cfeaa44a 1762 return -ENXIO;
674eb685 1763
674eb685 1764 return 0;
ae018d9b
LP
1765}
1766
1767int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1768 _cleanup_free_ char *cgroup = NULL;
1769 int r;
1770
ae018d9b
LP
1771 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1772 if (r < 0)
1773 return r;
1774
1775 return cg_path_get_owner_uid(cgroup, uid);
1776}
1777
1021b21b
LP
1778int cg_path_get_slice(const char *p, char **slice) {
1779 const char *e = NULL;
1021b21b
LP
1780
1781 assert(p);
1782 assert(slice);
1783
329ac4bc
LP
1784 /* Finds the right-most slice unit from the beginning, but
1785 * stops before we come to the first non-slice unit. */
1786
1021b21b
LP
1787 for (;;) {
1788 size_t n;
1789
1790 p += strspn(p, "/");
1791
1792 n = strcspn(p, "/");
8b0849e9 1793 if (!valid_slice_name(p, n)) {
1021b21b 1794
8b0849e9
LP
1795 if (!e) {
1796 char *s;
1021b21b 1797
e5d855d3 1798 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1799 if (!s)
1800 return -ENOMEM;
1021b21b 1801
8b0849e9
LP
1802 *slice = s;
1803 return 0;
1804 }
1805
1806 return cg_path_decode_unit(e, slice);
1021b21b
LP
1807 }
1808
1809 e = p;
1021b21b
LP
1810 p += n;
1811 }
1812}
1813
1814int cg_pid_get_slice(pid_t pid, char **slice) {
1815 _cleanup_free_ char *cgroup = NULL;
1816 int r;
1817
1818 assert(slice);
1819
1820 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1821 if (r < 0)
1822 return r;
1823
1824 return cg_path_get_slice(cgroup, slice);
1825}
1826
329ac4bc
LP
1827int cg_path_get_user_slice(const char *p, char **slice) {
1828 const char *t;
1829 assert(p);
1830 assert(slice);
1831
1832 t = skip_user_prefix(p);
1833 if (!t)
1834 return -ENXIO;
1835
1836 /* And now it looks pretty much the same as for a system
1837 * slice, so let's just use the same parser from here on. */
1838 return cg_path_get_slice(t, slice);
1839}
1840
1841int cg_pid_get_user_slice(pid_t pid, char **slice) {
1842 _cleanup_free_ char *cgroup = NULL;
1843 int r;
1844
1845 assert(slice);
1846
1847 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1848 if (r < 0)
1849 return r;
1850
1851 return cg_path_get_user_slice(cgroup, slice);
1852}
1853
ae018d9b
LP
1854char *cg_escape(const char *p) {
1855 bool need_prefix = false;
1856
1857 /* This implements very minimal escaping for names to be used
1858 * as file names in the cgroup tree: any name which might
1859 * conflict with a kernel name or is prefixed with '_' is
1860 * prefixed with a '_'. That way, when reading cgroup names it
1861 * is sufficient to remove a single prefixing underscore if
1862 * there is one. */
1863
1864 /* The return value of this function (unlike cg_unescape())
1865 * needs free()! */
1866
4c701096 1867 if (IN_SET(p[0], 0, '_', '.') ||
0cbd293e 1868 STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
efdb0237 1869 startswith(p, "cgroup."))
ae018d9b
LP
1870 need_prefix = true;
1871 else {
1872 const char *dot;
1873
1874 dot = strrchr(p, '.');
1875 if (dot) {
efdb0237
LP
1876 CGroupController c;
1877 size_t l = dot - p;
ae018d9b 1878
efdb0237
LP
1879 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1880 const char *n;
1881
1882 n = cgroup_controller_to_string(c);
ae018d9b 1883
efdb0237
LP
1884 if (l != strlen(n))
1885 continue;
ae018d9b 1886
efdb0237
LP
1887 if (memcmp(p, n, l) != 0)
1888 continue;
1889
1890 need_prefix = true;
1891 break;
ae018d9b
LP
1892 }
1893 }
1894 }
1895
1896 if (need_prefix)
1897 return strappend("_", p);
efdb0237
LP
1898
1899 return strdup(p);
ae018d9b
LP
1900}
1901
1902char *cg_unescape(const char *p) {
1903 assert(p);
1904
1905 /* The return value of this function (unlike cg_escape())
1906 * doesn't need free()! */
1907
1908 if (p[0] == '_')
1909 return (char*) p+1;
1910
1911 return (char*) p;
1912}
78edb35a
LP
1913
1914#define CONTROLLER_VALID \
4b549144 1915 DIGITS LETTERS \
78edb35a
LP
1916 "_"
1917
185a0874 1918bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1919 const char *t, *s;
1920
1921 if (!p)
1922 return false;
1923
b6629c4b
TH
1924 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1925 return true;
1926
185a0874
DJL
1927 s = startswith(p, "name=");
1928 if (s)
1929 p = s;
78edb35a 1930
4c701096 1931 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1932 return false;
1933
1934 for (t = p; *t; t++)
1935 if (!strchr(CONTROLLER_VALID, *t))
1936 return false;
1937
1938 if (t - p > FILENAME_MAX)
1939 return false;
1940
1941 return true;
1942}
a016b922
LP
1943
1944int cg_slice_to_path(const char *unit, char **ret) {
1945 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1946 const char *dash;
7410616c 1947 int r;
a016b922
LP
1948
1949 assert(unit);
1950 assert(ret);
1951
e5d855d3 1952 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1953 char *x;
1954
1955 x = strdup("");
1956 if (!x)
1957 return -ENOMEM;
1958 *ret = x;
1959 return 0;
1960 }
1961
7410616c 1962 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1963 return -EINVAL;
1964
1965 if (!endswith(unit, ".slice"))
1966 return -EINVAL;
1967
7410616c
LP
1968 r = unit_name_to_prefix(unit, &p);
1969 if (r < 0)
1970 return r;
a016b922
LP
1971
1972 dash = strchr(p, '-');
e66e5b61
LP
1973
1974 /* Don't allow initial dashes */
1975 if (dash == p)
1976 return -EINVAL;
1977
a016b922
LP
1978 while (dash) {
1979 _cleanup_free_ char *escaped = NULL;
1980 char n[dash - p + sizeof(".slice")];
1981
989290db 1982#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501 1983 /* msan doesn't instrument stpncpy, so it thinks
5238e957 1984 * n is later used uninitialized:
1c56d501
ZJS
1985 * https://github.com/google/sanitizers/issues/926
1986 */
1987 zero(n);
1988#endif
1989
e66e5b61 1990 /* Don't allow trailing or double dashes */
4c701096 1991 if (IN_SET(dash[1], 0, '-'))
c96cc582 1992 return -EINVAL;
a016b922 1993
c96cc582 1994 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1995 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1996 return -EINVAL;
1997
1998 escaped = cg_escape(n);
1999 if (!escaped)
2000 return -ENOMEM;
2001
2002 if (!strextend(&s, escaped, "/", NULL))
2003 return -ENOMEM;
2004
2005 dash = strchr(dash+1, '-');
2006 }
2007
2008 e = cg_escape(unit);
2009 if (!e)
2010 return -ENOMEM;
2011
2012 if (!strextend(&s, e, NULL))
2013 return -ENOMEM;
2014
ae2a15bc 2015 *ret = TAKE_PTR(s);
a016b922
LP
2016
2017 return 0;
2018}
4ad49000
LP
2019
2020int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2021 _cleanup_free_ char *p = NULL;
2022 int r;
2023
2024 r = cg_get_path(controller, path, attribute, &p);
2025 if (r < 0)
2026 return r;
2027
604028de 2028 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
2029}
2030
934277fe
LP
2031int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2032 _cleanup_free_ char *p = NULL;
2033 int r;
2034
2035 r = cg_get_path(controller, path, attribute, &p);
2036 if (r < 0)
2037 return r;
2038
2039 return read_one_line_file(p, ret);
2040}
2041
b734a4ff
LP
2042int cg_get_keyed_attribute(
2043 const char *controller,
2044 const char *path,
2045 const char *attribute,
2046 char **keys,
2047 char **ret_values) {
66ebf6c0 2048
b734a4ff 2049 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 2050 const char *p;
9177fa9f 2051 size_t n, i, n_done = 0;
b734a4ff
LP
2052 char **v;
2053 int r;
2054
4e1dfa45 2055 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
2056 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
2057 * entries as 'keys'. On success each entry will be set to the value of the matching key.
2058 *
2059 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
66ebf6c0
TH
2060
2061 r = cg_get_path(controller, path, attribute, &filename);
2062 if (r < 0)
2063 return r;
2064
b734a4ff 2065 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
2066 if (r < 0)
2067 return r;
2068
b734a4ff
LP
2069 n = strv_length(keys);
2070 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
2071 return 0;
66ebf6c0 2072
b734a4ff
LP
2073 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
2074 v = newa0(char*, n);
66ebf6c0 2075
b734a4ff
LP
2076 for (p = contents; *p;) {
2077 const char *w = NULL;
b734a4ff 2078
9177fa9f
ZJS
2079 for (i = 0; i < n; i++)
2080 if (!v[i]) {
b734a4ff
LP
2081 w = first_word(p, keys[i]);
2082 if (w)
2083 break;
66ebf6c0 2084 }
66ebf6c0 2085
b734a4ff 2086 if (w) {
b734a4ff
LP
2087 size_t l;
2088
2089 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
2090 v[i] = strndup(w, l);
2091 if (!v[i]) {
b734a4ff
LP
2092 r = -ENOMEM;
2093 goto fail;
66ebf6c0 2094 }
b734a4ff 2095
b734a4ff 2096 n_done++;
b734a4ff
LP
2097 if (n_done >= n)
2098 goto done;
2099
2100 p = w + l;
9177fa9f 2101 } else
b734a4ff 2102 p += strcspn(p, NEWLINE);
b734a4ff
LP
2103
2104 p += strspn(p, NEWLINE);
66ebf6c0
TH
2105 }
2106
b734a4ff
LP
2107 r = -ENXIO;
2108
2109fail:
2110 for (i = 0; i < n; i++)
2111 free(v[i]);
2112
2113 return r;
2114
2115done:
2116 memcpy(ret_values, v, sizeof(char*) * n);
66ebf6c0 2117 return 0;
b734a4ff 2118
66ebf6c0
TH
2119}
2120
efdb0237
LP
2121int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2122 CGroupController c;
e353faa0 2123 CGroupMask done;
65be7e06 2124 bool created;
415fc41c 2125 int r;
4ad49000
LP
2126
2127 /* This one will create a cgroup in our private tree, but also
2128 * duplicate it in the trees specified in mask, and remove it
65be7e06
ZJS
2129 * in all others.
2130 *
2131 * Returns 0 if the group already existed in the systemd hierarchy,
2132 * 1 on success, negative otherwise.
2133 */
4ad49000
LP
2134
2135 /* First create the cgroup in our own hierarchy. */
2136 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2137 if (r < 0)
2138 return r;
490c5a37 2139 created = r;
4ad49000 2140
efdb0237 2141 /* If we are in the unified hierarchy, we are done now */
b4cccbc1
LP
2142 r = cg_all_unified();
2143 if (r < 0)
2144 return r;
2145 if (r > 0)
65be7e06 2146 return created;
efdb0237 2147
e353faa0
LP
2148 supported &= CGROUP_MASK_V1;
2149 mask = CGROUP_MASK_EXTEND_JOINED(mask);
2150 done = 0;
2151
efdb0237
LP
2152 /* Otherwise, do the same in the other hierarchies */
2153 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2154 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2155 const char *n;
2156
e353faa0 2157 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2158 continue;
2159
e353faa0
LP
2160 if (FLAGS_SET(done, bit))
2161 continue;
efdb0237 2162
e353faa0 2163 n = cgroup_controller_to_string(c);
f99850a0 2164 if (FLAGS_SET(mask, bit))
efdb0237 2165 (void) cg_create(n, path);
e353faa0 2166 else
efdb0237 2167 (void) cg_trim(n, path, true);
e353faa0
LP
2168
2169 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2170 }
2171
65be7e06 2172 return created;
4ad49000
LP
2173}
2174
efdb0237
LP
2175int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2176 CGroupController c;
e353faa0 2177 CGroupMask done;
415fc41c 2178 int r;
4ad49000
LP
2179
2180 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13b84ec7
LP
2181 if (r < 0)
2182 return r;
4ad49000 2183
b4cccbc1
LP
2184 r = cg_all_unified();
2185 if (r < 0)
2186 return r;
2187 if (r > 0)
efdb0237 2188 return 0;
7b3fd631 2189
e353faa0
LP
2190 supported &= CGROUP_MASK_V1;
2191 done = 0;
2192
efdb0237
LP
2193 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2194 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2195 const char *p = NULL;
7b3fd631 2196
e353faa0 2197 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2198 continue;
2199
e353faa0 2200 if (FLAGS_SET(done, bit))
efdb0237 2201 continue;
7b3fd631 2202
efdb0237
LP
2203 if (path_callback)
2204 p = path_callback(bit, userdata);
efdb0237
LP
2205 if (!p)
2206 p = path;
4ad49000 2207
efdb0237 2208 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
e353faa0 2209 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2210 }
2211
13b84ec7 2212 return 0;
4ad49000
LP
2213}
2214
efdb0237 2215int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
6c12b52e
LP
2216 Iterator i;
2217 void *pidp;
2218 int r = 0;
2219
2220 SET_FOREACH(pidp, pids, i) {
fea72cc0 2221 pid_t pid = PTR_TO_PID(pidp);
13b84ec7 2222 int q;
6c12b52e 2223
7b3fd631 2224 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
efdb0237 2225 if (q < 0 && r >= 0)
13b84ec7 2226 r = q;
6c12b52e
LP
2227 }
2228
2229 return r;
2230}
2231
efdb0237 2232int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
b3c5bad3 2233 CGroupController c;
e353faa0 2234 CGroupMask done;
b4cccbc1 2235 int r = 0, q;
4ad49000 2236
13b84ec7 2237 if (!path_equal(from, to)) {
1d98fef1 2238 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
13b84ec7
LP
2239 if (r < 0)
2240 return r;
2241 }
4ad49000 2242
b4cccbc1
LP
2243 q = cg_all_unified();
2244 if (q < 0)
2245 return q;
2246 if (q > 0)
efdb0237 2247 return r;
03b90d4b 2248
e353faa0
LP
2249 supported &= CGROUP_MASK_V1;
2250 done = 0;
2251
efdb0237
LP
2252 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2253 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2254 const char *p = NULL;
03b90d4b 2255
e353faa0 2256 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2257 continue;
2258
e353faa0 2259 if (FLAGS_SET(done, bit))
efdb0237 2260 continue;
03b90d4b 2261
efdb0237
LP
2262 if (to_callback)
2263 p = to_callback(bit, userdata);
efdb0237
LP
2264 if (!p)
2265 p = to;
2266
1d98fef1 2267 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
e353faa0 2268 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2269 }
2270
e353faa0 2271 return r;
4ad49000
LP
2272}
2273
efdb0237
LP
2274int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2275 CGroupController c;
e353faa0 2276 CGroupMask done;
b4cccbc1 2277 int r, q;
4ad49000
LP
2278
2279 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2280 if (r < 0)
2281 return r;
2282
b4cccbc1
LP
2283 q = cg_all_unified();
2284 if (q < 0)
2285 return q;
2286 if (q > 0)
efdb0237
LP
2287 return r;
2288
e353faa0
LP
2289 supported &= CGROUP_MASK_V1;
2290 done = 0;
2291
efdb0237
LP
2292 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2293 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2294
e353faa0 2295 if (!FLAGS_SET(supported, bit))
ab275f23
LP
2296 continue;
2297
e353faa0 2298 if (FLAGS_SET(done, bit))
efdb0237 2299 continue;
4ad49000 2300
efdb0237 2301 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
e353faa0 2302 done |= CGROUP_MASK_EXTEND_JOINED(bit);
4ad49000
LP
2303 }
2304
e353faa0 2305 return r;
4ad49000
LP
2306}
2307
aae7e17f 2308int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d
LP
2309 _cleanup_free_ char *s = NULL;
2310 size_t n = 0, allocated = 0;
2311 bool space = false;
aae7e17f 2312 CGroupController c;
aae7e17f
FB
2313
2314 assert(ret);
2315
2316 if (mask == 0) {
2317 *ret = NULL;
2318 return 0;
2319 }
2320
2321 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
2322 const char *k;
2323 size_t l;
aae7e17f 2324
f99850a0 2325 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
2326 continue;
2327
ec635a2d
LP
2328 k = cgroup_controller_to_string(c);
2329 l = strlen(k);
2330
2331 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2332 return -ENOMEM;
2333
2334 if (space)
2335 s[n] = ' ';
2336 memcpy(s + n + space, k, l);
2337 n += space + l;
2338
2339 space = true;
aae7e17f
FB
2340 }
2341
ec635a2d 2342 assert(s);
aae7e17f 2343
ec635a2d 2344 s[n] = 0;
ae2a15bc 2345 *ret = TAKE_PTR(s);
ec635a2d 2346
aae7e17f
FB
2347 return 0;
2348}
2349
38a90d45
LP
2350int cg_mask_from_string(const char *value, CGroupMask *ret) {
2351 CGroupMask m = 0;
2352
2353 assert(ret);
aae7e17f
FB
2354 assert(value);
2355
2356 for (;;) {
2357 _cleanup_free_ char *n = NULL;
2358 CGroupController v;
2359 int r;
2360
2361 r = extract_first_word(&value, &n, NULL, 0);
2362 if (r < 0)
2363 return r;
2364 if (r == 0)
2365 break;
2366
2367 v = cgroup_controller_from_string(n);
2368 if (v < 0)
2369 continue;
2370
38a90d45 2371 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2372 }
38a90d45
LP
2373
2374 *ret = m;
aae7e17f
FB
2375 return 0;
2376}
2377
efdb0237 2378int cg_mask_supported(CGroupMask *ret) {
38a90d45 2379 CGroupMask mask;
415fc41c 2380 int r;
efdb0237 2381
67558d15
LP
2382 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2383 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2384 * pseudo-controllers. */
4ad49000 2385
b4cccbc1
LP
2386 r = cg_all_unified();
2387 if (r < 0)
2388 return r;
2389 if (r > 0) {
5f4c5fef 2390 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
2391
2392 /* In the unified hierarchy we can read the supported
2393 * and accessible controllers from a the top-level
2394 * cgroup attribute */
2395
5f4c5fef
LP
2396 r = cg_get_root_path(&root);
2397 if (r < 0)
2398 return r;
2399
2400 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2401 if (r < 0)
2402 return r;
2403
2404 r = read_one_line_file(path, &controllers);
efdb0237
LP
2405 if (r < 0)
2406 return r;
4ad49000 2407
aae7e17f
FB
2408 r = cg_mask_from_string(controllers, &mask);
2409 if (r < 0)
2410 return r;
efdb0237 2411
03afd780 2412 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
03a7b521 2413 * everything else off. */
03afd780 2414 mask &= CGROUP_MASK_V2;
efdb0237
LP
2415
2416 } else {
2417 CGroupController c;
2418
03afd780 2419 /* In the legacy hierarchy, we check which hierarchies are mounted. */
efdb0237 2420
38a90d45 2421 mask = 0;
efdb0237 2422 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2423 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2424 const char *n;
2425
03afd780
LP
2426 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2427 continue;
2428
efdb0237
LP
2429 n = cgroup_controller_to_string(c);
2430 if (controller_is_accessible(n) >= 0)
03afd780 2431 mask |= bit;
efdb0237 2432 }
4ad49000
LP
2433 }
2434
efdb0237
LP
2435 *ret = mask;
2436 return 0;
4ad49000 2437}
b12afc8c 2438
6925a0de
LP
2439int cg_kernel_controllers(Set **ret) {
2440 _cleanup_set_free_free_ Set *controllers = NULL;
b12afc8c 2441 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2442 int r;
2443
6925a0de 2444 assert(ret);
b12afc8c 2445
f09e86bc
LS
2446 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2447 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2448 * pseudo-controllers. */
e155a0aa 2449
6925a0de
LP
2450 controllers = set_new(&string_hash_ops);
2451 if (!controllers)
2452 return -ENOMEM;
2453
fdeea3f4
ZJS
2454 r = fopen_unlocked("/proc/cgroups", "re", &f);
2455 if (r == -ENOENT) {
2456 *ret = NULL;
2457 return 0;
b12afc8c 2458 }
fdeea3f4
ZJS
2459 if (r < 0)
2460 return r;
35bbbf85 2461
b12afc8c 2462 /* Ignore the header line */
2351e44d 2463 (void) read_line(f, (size_t) -1, NULL);
b12afc8c
LP
2464
2465 for (;;) {
2466 char *controller;
2467 int enabled = 0;
2468
2469 errno = 0;
2470 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2471
2472 if (feof(f))
2473 break;
2474
b3267152 2475 if (ferror(f) && errno > 0)
b12afc8c
LP
2476 return -errno;
2477
2478 return -EBADMSG;
2479 }
2480
2481 if (!enabled) {
2482 free(controller);
2483 continue;
2484 }
2485
efdb0237 2486 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
2487 free(controller);
2488 return -EBADMSG;
2489 }
2490
2491 r = set_consume(controllers, controller);
2492 if (r < 0)
2493 return r;
2494 }
2495
1cc6c93a 2496 *ret = TAKE_PTR(controllers);
6925a0de 2497
b12afc8c
LP
2498 return 0;
2499}
efdb0237 2500
5da38d07
TH
2501static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2502
4e1dfa45 2503/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
c22800e4
LP
2504 * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2505 * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2506 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
f08e9287 2507 *
c22800e4
LP
2508 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
2509 * process management but disable the compat dual layout, we return %true on
2510 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
f08e9287
TH
2511 */
2512static thread_local bool unified_systemd_v232;
2513
1fcca10e 2514static int cg_unified_update(void) {
efdb0237 2515
efdb0237
LP
2516 struct statfs fs;
2517
2518 /* Checks if we support the unified hierarchy. Returns an
2519 * error when the cgroup hierarchies aren't mounted yet or we
2520 * have any other trouble determining if the unified hierarchy
2521 * is supported. */
2522
5da38d07
TH
2523 if (unified_cache >= CGROUP_UNIFIED_NONE)
2524 return 0;
efdb0237
LP
2525
2526 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2527 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2528
9aa21133
ZJS
2529 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2530 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2531 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2532 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2533 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2534 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2535 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2536 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2537 unified_systemd_v232 = false;
f08e9287 2538 } else {
2977724b 2539 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
9aa21133 2540 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
5535d8f7
EV
2541
2542 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2543 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2544 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2545 unified_systemd_v232 = true;
2546 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2547 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2548 unified_cache = CGROUP_UNIFIED_NONE;
2549 } else {
2550 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2551 (unsigned long long) fs.f_type);
5535d8f7 2552 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2553 }
2977724b 2554 }
baaa35ad
ZJS
2555 } else
2556 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2557 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2558 (unsigned long long)fs.f_type);
efdb0237 2559
5da38d07
TH
2560 return 0;
2561}
2562
c22800e4 2563int cg_unified_controller(const char *controller) {
b4cccbc1 2564 int r;
5da38d07 2565
1fcca10e 2566 r = cg_unified_update();
b4cccbc1
LP
2567 if (r < 0)
2568 return r;
5da38d07 2569
fc9ae717
LP
2570 if (unified_cache == CGROUP_UNIFIED_NONE)
2571 return false;
2572
2573 if (unified_cache >= CGROUP_UNIFIED_ALL)
2574 return true;
2575
2576 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2577}
2578
b4cccbc1 2579int cg_all_unified(void) {
4bb652ac
LP
2580 int r;
2581
2582 r = cg_unified_update();
2583 if (r < 0)
2584 return r;
2585
2586 return unified_cache >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2587}
2588
b4cccbc1
LP
2589int cg_hybrid_unified(void) {
2590 int r;
2977724b 2591
1fcca10e 2592 r = cg_unified_update();
b4cccbc1
LP
2593 if (r < 0)
2594 return r;
2977724b 2595
f08e9287 2596 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2977724b
TH
2597}
2598
415fc41c 2599int cg_unified_flush(void) {
5da38d07 2600 unified_cache = CGROUP_UNIFIED_UNKNOWN;
415fc41c 2601
1fcca10e 2602 return cg_unified_update();
efdb0237
LP
2603}
2604
27adcc97
LP
2605int cg_enable_everywhere(
2606 CGroupMask supported,
2607 CGroupMask mask,
2608 const char *p,
2609 CGroupMask *ret_result_mask) {
2610
77fa610b 2611 _cleanup_fclose_ FILE *f = NULL;
efdb0237
LP
2612 _cleanup_free_ char *fs = NULL;
2613 CGroupController c;
27adcc97 2614 CGroupMask ret = 0;
415fc41c 2615 int r;
efdb0237
LP
2616
2617 assert(p);
2618
27adcc97
LP
2619 if (supported == 0) {
2620 if (ret_result_mask)
2621 *ret_result_mask = 0;
efdb0237 2622 return 0;
27adcc97 2623 }
efdb0237 2624
b4cccbc1
LP
2625 r = cg_all_unified();
2626 if (r < 0)
2627 return r;
27adcc97 2628 if (r == 0) {
5238e957 2629 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
27adcc97
LP
2630 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
2631 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
2632 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
2633 * minimize surprises here and reduce triggers for re-realization by always saying we fully
2634 * succeeded.) */
2635 if (ret_result_mask)
2636 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
2637 * CGROUP_MASK_V2: The 'supported' mask
2638 * might contain pure-V1 or BPF
2639 * controllers, and we never want to
2640 * claim that we could enable those with
2641 * cgroup.subtree_control */
efdb0237 2642 return 0;
27adcc97 2643 }
efdb0237
LP
2644
2645 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2646 if (r < 0)
2647 return r;
2648
2649 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2650 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2651 const char *n;
2652
ab275f23
LP
2653 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
2654 continue;
2655
f99850a0 2656 if (!FLAGS_SET(supported, bit))
efdb0237
LP
2657 continue;
2658
2659 n = cgroup_controller_to_string(c);
2660 {
2661 char s[1 + strlen(n) + 1];
2662
f99850a0 2663 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
efdb0237
LP
2664 strcpy(s + 1, n);
2665
77fa610b
LP
2666 if (!f) {
2667 f = fopen(fs, "we");
54b5ba1d
LP
2668 if (!f)
2669 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
77fa610b
LP
2670 }
2671
604028de 2672 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
96aa6591 2673 if (r < 0) {
94f344fb
LP
2674 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
2675 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
96aa6591 2676 clearerr(f);
27adcc97
LP
2677
2678 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
2679 * happens for example when we attempt to turn off a controller up in the tree that is
2680 * used down in the tree. */
2681 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
2682 * only here, and not follow the same logic
2683 * for other errors such as EINVAL or
2684 * EOPNOTSUPP or anything else. That's
2685 * because EBUSY indicates that the
2686 * controllers is currently enabled and
2687 * cannot be disabled because something down
2688 * the hierarchy is still using it. Any other
2689 * error most likely means something like "I
2690 * never heard of this controller" or
2691 * similar. In the former case it's hence
2692 * safe to assume the controller is still on
2693 * after the failed operation, while in the
2694 * latter case it's safer to assume the
2695 * controller is unknown and hence certainly
2696 * not enabled. */
2697 ret |= bit;
2698 } else {
2699 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
2700 if (FLAGS_SET(mask, bit))
2701 ret |= bit;
96aa6591 2702 }
efdb0237
LP
2703 }
2704 }
2705
27adcc97
LP
2706 /* Let's return the precise set of controllers now enabled for the cgroup. */
2707 if (ret_result_mask)
2708 *ret_result_mask = ret;
2709
efdb0237
LP
2710 return 0;
2711}
2712
2713bool cg_is_unified_wanted(void) {
2714 static thread_local int wanted = -1;
415fc41c 2715 int r;
1d84ad94 2716 bool b;
77fab2a9 2717 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
5f086dc7 2718 _cleanup_free_ char *c = NULL;
efdb0237 2719
77fab2a9 2720 /* If we have a cached value, return that. */
efdb0237
LP
2721 if (wanted >= 0)
2722 return wanted;
2723
239a3d09
ZJS
2724 /* If the hierarchy is already mounted, then follow whatever
2725 * was chosen for it. */
2726 if (cg_unified_flush() >= 0)
b4cccbc1 2727 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
239a3d09 2728
5f086dc7
CD
2729 /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
2730 * respect that. */
1d84ad94 2731 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
5f086dc7
CD
2732 if (r > 0)
2733 return (wanted = b);
2734
2735 /* If we passed cgroup_no_v1=all with no other instructions, it seems
2736 * highly unlikely that we want to use hybrid or legacy hierarchy. */
2737 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
2738 if (r > 0 && streq_ptr(c, "all"))
2739 return (wanted = true);
efdb0237 2740
5f086dc7 2741 return (wanted = is_default);
efdb0237
LP
2742}
2743
2744bool cg_is_legacy_wanted(void) {
239a3d09
ZJS
2745 static thread_local int wanted = -1;
2746
2747 /* If we have a cached value, return that. */
2748 if (wanted >= 0)
2749 return wanted;
2750
4e1dfa45 2751 /* Check if we have cgroup v2 already mounted. */
1b59cf04
ZJS
2752 if (cg_unified_flush() >= 0 &&
2753 unified_cache == CGROUP_UNIFIED_ALL)
239a3d09 2754 return (wanted = false);
1b59cf04
ZJS
2755
2756 /* Otherwise, assume that at least partial legacy is wanted,
4e1dfa45 2757 * since cgroup v2 should already be mounted at this point. */
239a3d09 2758 return (wanted = true);
efdb0237
LP
2759}
2760
a4464b95 2761bool cg_is_hybrid_wanted(void) {
5da38d07 2762 static thread_local int wanted = -1;
415fc41c 2763 int r;
1d84ad94 2764 bool b;
c19739db
ZJS
2765 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2766 /* We default to true if the default is "hybrid", obviously,
2767 * but also when the default is "unified", because if we get
2768 * called, it means that unified hierarchy was not mounted. */
5da38d07 2769
77fab2a9 2770 /* If we have a cached value, return that. */
5da38d07
TH
2771 if (wanted >= 0)
2772 return wanted;
2773
239a3d09
ZJS
2774 /* If the hierarchy is already mounted, then follow whatever
2775 * was chosen for it. */
2776 if (cg_unified_flush() >= 0 &&
2777 unified_cache == CGROUP_UNIFIED_ALL)
2778 return (wanted = false);
2779
77fab2a9
ZJS
2780 /* Otherwise, let's see what the kernel command line has to say.
2781 * Since checking is expensive, cache a non-error result. */
1d84ad94 2782 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
5da38d07 2783
2dcb526d
ZJS
2784 /* The meaning of the kernel option is reversed wrt. to the return value
2785 * of this function, hence the negation. */
77fab2a9 2786 return (wanted = r > 0 ? !b : is_default);
5da38d07
TH
2787}
2788
13c31542
TH
2789int cg_weight_parse(const char *s, uint64_t *ret) {
2790 uint64_t u;
2791 int r;
2792
2793 if (isempty(s)) {
2794 *ret = CGROUP_WEIGHT_INVALID;
2795 return 0;
2796 }
2797
2798 r = safe_atou64(s, &u);
2799 if (r < 0)
2800 return r;
2801
2802 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2803 return -ERANGE;
2804
2805 *ret = u;
2806 return 0;
2807}
2808
9be57249
TH
2809const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2810 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2811 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2812 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2813 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2814};
2815
2816static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2817 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2818 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2819 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2820 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2821};
2822
2823DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2824
d53d9474
LP
2825int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2826 uint64_t u;
2827 int r;
2828
2829 if (isempty(s)) {
2830 *ret = CGROUP_CPU_SHARES_INVALID;
2831 return 0;
2832 }
2833
2834 r = safe_atou64(s, &u);
2835 if (r < 0)
2836 return r;
2837
2838 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2839 return -ERANGE;
2840
2841 *ret = u;
2842 return 0;
2843}
2844
2845int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2846 uint64_t u;
2847 int r;
2848
2849 if (isempty(s)) {
2850 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2851 return 0;
2852 }
2853
2854 r = safe_atou64(s, &u);
2855 if (r < 0)
2856 return r;
2857
2858 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2859 return -ERANGE;
2860
2861 *ret = u;
2862 return 0;
2863}
2864
f0bef277
EV
2865bool is_cgroup_fs(const struct statfs *s) {
2866 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2867 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2868}
2869
2870bool fd_is_cgroup_fs(int fd) {
2871 struct statfs s;
2872
2873 if (fstatfs(fd, &s) < 0)
2874 return -errno;
2875
2876 return is_cgroup_fs(&s);
2877}
2878
b82f71c7 2879static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2880 [CGROUP_CONTROLLER_CPU] = "cpu",
2881 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
13c31542 2882 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2883 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2884 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2885 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2886 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2887 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2888 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
efdb0237
LP
2889};
2890
2891DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2892
2893CGroupMask get_cpu_accounting_mask(void) {
2894 static CGroupMask needed_mask = (CGroupMask) -1;
2895
2896 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2897 * provided externally from the CPU controller, which means we don't
2898 * need to enable the CPU controller just to get metrics. This is good,
2899 * because enabling the CPU controller comes at a minor performance
2900 * hit, especially when it's propagated deep into large hierarchies.
2901 * There's also no separate CPU accounting controller available within
2902 * a unified hierarchy.
2903 *
2904 * This combination of factors results in the desired cgroup mask to
2905 * enable for CPU accounting varying as follows:
2906 *
2907 * ╔═════════════════════╤═════════════════════╗
2908 * ║ Linux ≥4.15 │ Linux <4.15 ║
2909 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2910 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2911 * ╟───────────────╫─────────────────────┼─────────────────────╢
2912 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2913 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2914 *
2915 * We check kernel version here instead of manually checking whether
2916 * cpu.stat is present for every cgroup, as that check in itself would
2917 * already be fairly expensive.
2918 *
2919 * Kernels where this patch has been backported will therefore have the
2920 * CPU controller enabled unnecessarily. This is more expensive than
2921 * necessary, but harmless. ☺️
2922 */
2923
2924 if (needed_mask == (CGroupMask) -1) {
2925 if (cg_all_unified()) {
2926 struct utsname u;
2927 assert_se(uname(&u) >= 0);
2928
2929 if (str_verscmp(u.release, "4.15") < 0)
2930 needed_mask = CGROUP_MASK_CPU;
2931 else
2932 needed_mask = 0;
2933 } else
2934 needed_mask = CGROUP_MASK_CPUACCT;
2935 }
2936
2937 return needed_mask;
2938}
2939
2940bool cpu_accounting_is_cheap(void) {
2941 return get_cpu_accounting_mask() == 0;
2942}