]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
hexdecoct: make unbase64mem and unhexmem always use SIZE_MAX
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
8c6db833
LP
2
3#include <errno.h>
11c3a366 4#include <limits.h>
8c6db833 5#include <signal.h>
11c3a366 6#include <stddef.h>
8c6db833 7#include <stdlib.h>
672c48cc 8#include <sys/types.h>
f98c2585 9#include <sys/utsname.h>
4b58153d 10#include <sys/xattr.h>
84ac7bea 11#include <unistd.h>
8c6db833 12
b5efdb8a 13#include "alloc-util.h"
3ffd4af2 14#include "cgroup-util.h"
28db6fbf 15#include "constants.h"
a0956174 16#include "dirent-util.h"
84ac7bea 17#include "extract-word.h"
3ffd4af2 18#include "fd-util.h"
84ac7bea 19#include "fileio.h"
f97b34a6 20#include "format-util.h"
f4f15635 21#include "fs-util.h"
93cc7779 22#include "log.h"
84ac7bea
LP
23#include "login-util.h"
24#include "macro.h"
f5947a5e 25#include "missing_magic.h"
5545f336 26#include "missing_threads.h"
84ac7bea 27#include "mkdir.h"
6bedfcbb 28#include "parse-util.h"
9eb977db 29#include "path-util.h"
84ac7bea
LP
30#include "process-util.h"
31#include "set.h"
9444b1f2 32#include "special.h"
872a590e 33#include "stat-util.h"
d054f0a4 34#include "stdio-util.h"
8b43440b 35#include "string-table.h"
07630cea 36#include "string-util.h"
aae7e17f 37#include "strv.h"
84ac7bea 38#include "unit-name.h"
b1d4f8e1 39#include "user-util.h"
baa358df 40#include "xattr-util.h"
8c6db833 41
aef43552 42static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
7027ff61 43 _cleanup_free_ char *fs = NULL;
c6c18be3 44 FILE *f;
7027ff61 45 int r;
c6c18be3 46
aef43552 47 assert(ret);
c6c18be3 48
e48fcfef 49 r = cg_get_path(controller, path, item, &fs);
c3175a7f 50 if (r < 0)
c6c18be3
LP
51 return r;
52
53 f = fopen(fs, "re");
c6c18be3
LP
54 if (!f)
55 return -errno;
56
aef43552 57 *ret = f;
c6c18be3
LP
58 return 0;
59}
60
aef43552
LP
61int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
62 return cg_enumerate_items(controller, path, ret, "cgroup.procs");
e48fcfef
TM
63}
64
aef43552 65int cg_read_pid(FILE *f, pid_t *ret) {
c6c18be3
LP
66 unsigned long ul;
67
aef43552 68 /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
c6c18be3 69
7027ff61 70 assert(f);
aef43552 71 assert(ret);
7027ff61 72
c6c18be3
LP
73 errno = 0;
74 if (fscanf(f, "%lu", &ul) != 1) {
75
8ab40789
LP
76 if (feof(f)) {
77 *ret = 0;
c6c18be3 78 return 0;
8ab40789 79 }
c6c18be3 80
66855de7 81 return errno_or_else(EIO);
c6c18be3
LP
82 }
83
84 if (ul <= 0)
85 return -EIO;
5ed7cdfb
LP
86 if (ul > PID_T_MAX)
87 return -EIO;
c6c18be3 88
aef43552 89 *ret = (pid_t) ul;
c6c18be3
LP
90 return 1;
91}
92
12c7d27b
LP
93int cg_read_pidref(FILE *f, PidRef *ret) {
94 int r;
95
96 assert(f);
97 assert(ret);
98
99 for (;;) {
100 pid_t pid;
101
102 r = cg_read_pid(f, &pid);
103 if (r < 0)
104 return r;
105 if (r == 0) {
106 *ret = PIDREF_NULL;
107 return 0;
108 }
109
110 r = pidref_set_pid(ret, pid);
111 if (r >= 0)
112 return 1;
113 if (r != -ESRCH)
114 return r;
115
116 /* ESRCH → gone by now? just skip over it, read the next */
117 }
118}
119
8b238b13
LP
120int cg_read_event(
121 const char *controller,
122 const char *path,
123 const char *event,
31a9be23 124 char **ret) {
8b238b13 125
ab2c3861 126 _cleanup_free_ char *events = NULL, *content = NULL;
ab2c3861
TH
127 int r;
128
129 r = cg_get_path(controller, path, "cgroup.events", &events);
130 if (r < 0)
131 return r;
132
627055ce 133 r = read_full_virtual_file(events, &content, NULL);
ab2c3861
TH
134 if (r < 0)
135 return r;
136
31a9be23
YW
137 for (const char *p = content;;) {
138 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
139 const char *q;
140
141 r = extract_first_word(&p, &line, "\n", 0);
142 if (r < 0)
143 return r;
144 if (r == 0)
145 return -ENOENT;
146
147 q = line;
148 r = extract_first_word(&q, &key, " ", 0);
149 if (r < 0)
150 return r;
151 if (r == 0)
ab2c3861
TH
152 return -EINVAL;
153
31a9be23 154 if (!streq(key, event))
ab2c3861
TH
155 continue;
156
31a9be23
YW
157 val = strdup(q);
158 if (!val)
159 return -ENOMEM;
160
161 *ret = TAKE_PTR(val);
ab2c3861
TH
162 return 0;
163 }
ab2c3861
TH
164}
165
3228995c
CB
166bool cg_ns_supported(void) {
167 static thread_local int enabled = -1;
168
169 if (enabled >= 0)
170 return enabled;
171
0887fa71
LP
172 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
173 if (errno != ENOENT)
174 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
175 enabled = false;
176 } else
177 enabled = true;
3228995c
CB
178
179 return enabled;
180}
181
d9e45bc3
MS
182bool cg_freezer_supported(void) {
183 static thread_local int supported = -1;
184
185 if (supported >= 0)
186 return supported;
187
188 supported = cg_all_unified() > 0 && access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) == 0;
189
190 return supported;
191}
192
8a513eee
AB
193bool cg_kill_supported(void) {
194 static thread_local int supported = -1;
195
196 if (supported >= 0)
197 return supported;
198
199 if (cg_all_unified() <= 0)
200 supported = false;
201 else if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) < 0) {
202 if (errno != ENOENT)
203 log_debug_errno(errno, "Failed to check if cgroup.kill is available, assuming not: %m");
204 supported = false;
205 } else
206 supported = true;
207
208 return supported;
209}
210
aef43552 211int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
7027ff61 212 _cleanup_free_ char *fs = NULL;
35d2e7ec 213 DIR *d;
aef43552 214 int r;
35d2e7ec 215
aef43552 216 assert(ret);
35d2e7ec
LP
217
218 /* This is not recursive! */
219
c3175a7f
LP
220 r = cg_get_path(controller, path, NULL, &fs);
221 if (r < 0)
35d2e7ec
LP
222 return r;
223
224 d = opendir(fs);
35d2e7ec
LP
225 if (!d)
226 return -errno;
227
aef43552 228 *ret = d;
35d2e7ec
LP
229 return 0;
230}
231
aef43552 232int cg_read_subgroup(DIR *d, char **ret) {
35d2e7ec 233 assert(d);
aef43552 234 assert(ret);
35d2e7ec 235
f01327ad 236 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
237 char *b;
238
239 if (de->d_type != DT_DIR)
240 continue;
241
49bfc877 242 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
243 continue;
244
7027ff61
LP
245 b = strdup(de->d_name);
246 if (!b)
35d2e7ec
LP
247 return -ENOMEM;
248
aef43552 249 *ret = b;
35d2e7ec
LP
250 return 1;
251 }
252
8ab40789 253 *ret = NULL;
35d2e7ec
LP
254 return 0;
255}
256
4ad49000 257int cg_rmdir(const char *controller, const char *path) {
7027ff61 258 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
259 int r;
260
ad293f5a
LP
261 r = cg_get_path(controller, path, NULL, &p);
262 if (r < 0)
35d2e7ec
LP
263 return r;
264
265 r = rmdir(p);
7027ff61
LP
266 if (r < 0 && errno != ENOENT)
267 return -errno;
35d2e7ec 268
b4cccbc1 269 r = cg_hybrid_unified();
f20db199 270 if (r <= 0)
b4cccbc1 271 return r;
b4cccbc1
LP
272
273 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
274 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
275 if (r < 0)
276 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
277 }
278
7027ff61 279 return 0;
35d2e7ec
LP
280}
281
e48fcfef 282static int cg_kill_items(
1d98fef1
LP
283 const char *path,
284 int sig,
285 CGroupFlags flags,
286 Set *s,
287 cg_kill_log_func_t log_kill,
e48fcfef
TM
288 void *userdata,
289 const char *item) {
1d98fef1 290
7027ff61 291 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 292 bool done = false;
c53d2d54 293 int r, ret = 0, ret_log_kill = 0;
8c6db833 294
8c6db833
LP
295 assert(sig >= 0);
296
0d5b4810
LP
297 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
298 * SIGCONT on SIGKILL. */
299 if (IN_SET(sig, SIGCONT, SIGKILL))
300 flags &= ~CGROUP_SIGCONT;
301
8c6db833
LP
302 /* This goes through the tasks list and kills them all. This
303 * is repeated until no further processes are added to the
304 * tasks list, to properly handle forking processes */
305
7027ff61 306 if (!s) {
d5099efc 307 s = allocated_set = set_new(NULL);
7027ff61 308 if (!s)
ca949c9d 309 return -ENOMEM;
7027ff61 310 }
8c6db833 311
8c6db833 312 do {
7027ff61 313 _cleanup_fclose_ FILE *f = NULL;
8c6db833
LP
314 done = true;
315
bd1791b5 316 r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
f65a40fb
MY
317 if (r == -ENOENT)
318 break;
319 if (r < 0)
320 return RET_GATHER(ret, r);
c6c18be3 321
4d1b2df1
LP
322 for (;;) {
323 _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
324
325 r = cg_read_pidref(f, &pidref);
326 if (r < 0)
f65a40fb 327 return RET_GATHER(ret, r);
4d1b2df1
LP
328 if (r == 0)
329 break;
8c6db833 330
4d1b2df1 331 if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
c6c18be3 332 continue;
8c6db833 333
4d1b2df1 334 if (set_get(s, PID_TO_PTR(pidref.pid)) == PID_TO_PTR(pidref.pid))
c6c18be3 335 continue;
8c6db833 336
1d98fef1 337 if (log_kill)
4d1b2df1 338 ret_log_kill = log_kill(&pidref, sig, userdata);
1d98fef1 339
4d1b2df1
LP
340 /* If we haven't killed this process yet, kill it */
341 r = pidref_kill(&pidref, sig);
f65a40fb
MY
342 if (r < 0 && r != -ESRCH)
343 RET_GATHER(ret, r);
344 if (r >= 0) {
1d98fef1 345 if (flags & CGROUP_SIGCONT)
4d1b2df1 346 (void) pidref_kill(&pidref, SIGCONT);
430c18ed 347
c53d2d54
DB
348 if (ret == 0) {
349 if (log_kill)
350 ret = ret_log_kill;
351 else
352 ret = 1;
353 }
430c18ed 354 }
8c6db833 355
8c6db833
LP
356 done = false;
357
2d790175 358 r = set_put(s, PID_TO_PTR(pidref.pid));
f65a40fb
MY
359 if (r < 0)
360 return RET_GATHER(ret, r);
35d2e7ec
LP
361 }
362
f65a40fb
MY
363 /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
364 * until no new pids need to be killed. */
8c6db833 365
35d2e7ec 366 } while (!done);
8c6db833 367
35d2e7ec 368 return ret;
8c6db833
LP
369}
370
e48fcfef 371int cg_kill(
e48fcfef
TM
372 const char *path,
373 int sig,
374 CGroupFlags flags,
375 Set *s,
376 cg_kill_log_func_t log_kill,
377 void *userdata) {
37f0289b 378
379 int r, ret;
e48fcfef 380
bd1791b5 381 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.procs");
e48fcfef
TM
382 if (r < 0 || sig != SIGKILL)
383 return r;
384
37f0289b 385 ret = r;
386
e48fcfef 387 /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
cda5ccdb
TM
388 a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
389 (4340d175b898) and 4.14.138 (feb6b123b7dd). */
bd1791b5 390 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
37f0289b 391 if (r < 0)
392 return r;
393 if (r == 0)
394 return ret;
395
bd1791b5 396 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.threads");
37f0289b 397 if (r < 0)
e48fcfef 398 return r;
e48fcfef 399
37f0289b 400 return r > 0 || ret > 0;
e48fcfef
TM
401}
402
bd1791b5
LP
403int cg_kill_kernel_sigkill(const char *path) {
404 /* Kills the cgroup at `path` directly by writing to its cgroup.kill file. This sends SIGKILL to all
405 * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
406
8a513eee 407 _cleanup_free_ char *killfile = NULL;
bd1791b5 408 int r;
8a513eee
AB
409
410 assert(path);
411
412 if (!cg_kill_supported())
413 return -EOPNOTSUPP;
414
bd1791b5 415 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
8a513eee
AB
416 if (r < 0)
417 return r;
418
419 r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
420 if (r < 0)
421 return r;
422
423 return 0;
424}
425
1d98fef1 426int cg_kill_recursive(
1d98fef1
LP
427 const char *path,
428 int sig,
429 CGroupFlags flags,
430 Set *s,
431 cg_kill_log_func_t log_kill,
432 void *userdata) {
433
e155a0aa 434 int r, ret;
8c6db833
LP
435
436 assert(path);
8c6db833
LP
437 assert(sig >= 0);
438
8a513eee 439 if (sig == SIGKILL && cg_kill_supported() &&
bd1791b5 440 !FLAGS_SET(flags, CGROUP_IGNORE_SELF) && !s && !log_kill)
8a513eee 441 /* ignore CGROUP_SIGCONT, since this is a no-op alongside SIGKILL */
bd1791b5
LP
442 ret = cg_kill_kernel_sigkill(path);
443 else {
444 _cleanup_set_free_ Set *allocated_set = NULL;
445 _cleanup_closedir_ DIR *d = NULL;
446
8a513eee
AB
447 if (!s) {
448 s = allocated_set = set_new(NULL);
449 if (!s)
450 return -ENOMEM;
451 }
ca949c9d 452
bd1791b5 453 ret = cg_kill(path, sig, flags, s, log_kill, userdata);
8c6db833 454
bd1791b5 455 r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
8a513eee 456 if (r < 0) {
bd1791b5
LP
457 if (r != -ENOENT)
458 RET_GATHER(ret, r);
8c6db833 459
8a513eee
AB
460 return ret;
461 }
8c6db833 462
bd1791b5
LP
463 for (;;) {
464 _cleanup_free_ char *fn = NULL, *p = NULL;
465
466 r = cg_read_subgroup(d, &fn);
467 if (r < 0) {
468 RET_GATHER(ret, r);
469 break;
470 }
471 if (r == 0)
472 break;
8c6db833 473
8a513eee 474 p = path_join(empty_to_root(path), fn);
8a513eee
AB
475 if (!p)
476 return -ENOMEM;
8c6db833 477
bd1791b5 478 r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
8a513eee
AB
479 if (r != 0 && ret >= 0)
480 ret = r;
481 }
8c6db833 482 }
35d2e7ec 483
8a513eee 484 if (FLAGS_SET(flags, CGROUP_REMOVE)) {
bd1791b5
LP
485 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, path);
486 if (!IN_SET(r, -ENOENT, -EBUSY))
487 RET_GATHER(ret, r);
7027ff61 488 }
ca949c9d 489
8c6db833
LP
490 return ret;
491}
492
efdb0237 493static const char *controller_to_dirname(const char *controller) {
7027ff61
LP
494 assert(controller);
495
a561253f
LP
496 /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
497 * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
498 * specified. */
efdb0237 499
2977724b 500 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 501 if (cg_hybrid_unified() > 0)
2977724b
TH
502 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
503 else
504 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
505 }
b6629c4b 506
a561253f 507 return startswith(controller, "name=") ?: controller;
3474ae3c
LP
508}
509
a561253f 510static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
569b19d8 511 const char *dn;
018ef268 512 char *t = NULL;
3474ae3c 513
a561253f 514 assert(ret);
569b19d8
LP
515 assert(controller);
516
517 dn = controller_to_dirname(controller);
efdb0237
LP
518
519 if (isempty(path) && isempty(suffix))
657ee2d8 520 t = path_join("/sys/fs/cgroup", dn);
efdb0237 521 else if (isempty(path))
657ee2d8 522 t = path_join("/sys/fs/cgroup", dn, suffix);
efdb0237 523 else if (isempty(suffix))
657ee2d8 524 t = path_join("/sys/fs/cgroup", dn, path);
efdb0237 525 else
657ee2d8 526 t = path_join("/sys/fs/cgroup", dn, path, suffix);
efdb0237
LP
527 if (!t)
528 return -ENOMEM;
3474ae3c 529
a561253f 530 *ret = t;
efdb0237
LP
531 return 0;
532}
533
a561253f 534static int join_path_unified(const char *path, const char *suffix, char **ret) {
efdb0237
LP
535 char *t;
536
a561253f 537 assert(ret);
efdb0237
LP
538
539 if (isempty(path) && isempty(suffix))
540 t = strdup("/sys/fs/cgroup");
541 else if (isempty(path))
657ee2d8 542 t = path_join("/sys/fs/cgroup", suffix);
efdb0237 543 else if (isempty(suffix))
657ee2d8 544 t = path_join("/sys/fs/cgroup", path);
efdb0237 545 else
657ee2d8 546 t = path_join("/sys/fs/cgroup", path, suffix);
3474ae3c
LP
547 if (!t)
548 return -ENOMEM;
549
a561253f 550 *ret = t;
3474ae3c
LP
551 return 0;
552}
553
a561253f 554int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
415fc41c 555 int r;
8c6db833 556
a561253f 557 assert(ret);
dbd821ac 558
efdb0237
LP
559 if (!controller) {
560 char *t;
561
a561253f
LP
562 /* If no controller is specified, we return the path *below* the controllers, without any
563 * prefix. */
efdb0237 564
74c60fcb 565 if (isempty(path) && isempty(suffix))
efdb0237
LP
566 return -EINVAL;
567
a561253f 568 if (isempty(suffix))
efdb0237 569 t = strdup(path);
a561253f 570 else if (isempty(path))
efdb0237
LP
571 t = strdup(suffix);
572 else
657ee2d8 573 t = path_join(path, suffix);
efdb0237
LP
574 if (!t)
575 return -ENOMEM;
576
a561253f 577 *ret = path_simplify(t);
efdb0237
LP
578 return 0;
579 }
580
581 if (!cg_controller_is_valid(controller))
78edb35a
LP
582 return -EINVAL;
583
b4cccbc1
LP
584 r = cg_all_unified();
585 if (r < 0)
586 return r;
587 if (r > 0)
a561253f 588 r = join_path_unified(path, suffix, ret);
569b19d8 589 else
a561253f 590 r = join_path_legacy(controller, path, suffix, ret);
efdb0237
LP
591 if (r < 0)
592 return r;
7027ff61 593
a561253f 594 path_simplify(*ret);
efdb0237 595 return 0;
3474ae3c 596}
dbd821ac 597
0fa7b500
MK
598static int controller_is_v1_accessible(const char *root, const char *controller) {
599 const char *cpath, *dn;
37099707 600
efdb0237 601 assert(controller);
37099707 602
81504017 603 dn = controller_to_dirname(controller);
0fa7b500 604
e4645ca5
ZJS
605 /* If root if specified, we check that:
606 * - possible subcgroup is created at root,
607 * - we can modify the hierarchy. */
608
609 cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
cb3763d5 610 return laccess(cpath, root ? W_OK : F_OK);
37099707
LP
611}
612
820fe745 613int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
37099707 614 int r;
dbd821ac 615
efdb0237 616 assert(controller);
820fe745 617 assert(ret);
70132bd0 618
81504017
MK
619 if (!cg_controller_is_valid(controller))
620 return -EINVAL;
621
622 r = cg_all_unified();
37099707
LP
623 if (r < 0)
624 return r;
81504017
MK
625 if (r > 0) {
626 /* In the unified hierarchy all controllers are considered accessible,
627 * except for the named hierarchies */
628 if (startswith(controller, "name="))
629 return -EOPNOTSUPP;
630 } else {
631 /* Check if the specified controller is actually accessible */
0fa7b500 632 r = controller_is_v1_accessible(NULL, controller);
81504017
MK
633 if (r < 0)
634 return r;
635 }
3474ae3c 636
820fe745 637 return cg_get_path(controller, path, suffix, ret);
8c6db833
LP
638}
639
bd1791b5 640int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
4b58153d
LP
641 _cleanup_free_ char *fs = NULL;
642 int r;
643
644 assert(path);
645 assert(name);
646 assert(value || size <= 0);
647
bd1791b5 648 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
4b58153d
LP
649 if (r < 0)
650 return r;
651
7c248223 652 return RET_NERRNO(setxattr(fs, name, value, size, flags));
4b58153d
LP
653}
654
bd1791b5 655int cg_get_xattr(const char *path, const char *name, void *value, size_t size) {
4b58153d
LP
656 _cleanup_free_ char *fs = NULL;
657 ssize_t n;
658 int r;
659
660 assert(path);
661 assert(name);
662
bd1791b5 663 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
4b58153d
LP
664 if (r < 0)
665 return r;
666
667 n = getxattr(fs, name, value, size);
668 if (n < 0)
669 return -errno;
670
671 return (int) n;
672}
673
bd1791b5 674int cg_get_xattr_malloc(const char *path, const char *name, char **ret) {
baa358df
AZ
675 _cleanup_free_ char *fs = NULL;
676 int r;
677
678 assert(path);
679 assert(name);
680
bd1791b5 681 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
baa358df
AZ
682 if (r < 0)
683 return r;
684
bd1791b5 685 return lgetxattr_malloc(fs, name, ret);
baa358df
AZ
686}
687
bd1791b5 688int cg_get_xattr_bool(const char *path, const char *name) {
f0b8ac9e 689 _cleanup_free_ char *fs = NULL;
59331b8e
AZ
690 int r;
691
692 assert(path);
693 assert(name);
694
f0b8ac9e 695 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
59331b8e
AZ
696 if (r < 0)
697 return r;
698
f0b8ac9e 699 return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
59331b8e
AZ
700}
701
bd1791b5 702int cg_remove_xattr(const char *path, const char *name) {
bf25f165
LP
703 _cleanup_free_ char *fs = NULL;
704 int r;
705
706 assert(path);
707 assert(name);
708
bd1791b5 709 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
bf25f165
LP
710 if (r < 0)
711 return r;
712
7c248223 713 return RET_NERRNO(removexattr(fs, name));
bf25f165
LP
714}
715
ae7ef63f 716int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
7027ff61 717 _cleanup_fclose_ FILE *f = NULL;
7756528e 718 const char *fs, *controller_str = NULL; /* avoid false maybe-uninitialized warning */
d2b39cb6 719 int unified, r;
8c6db833 720
c6c18be3 721 assert(pid >= 0);
ae7ef63f 722 assert(ret_path);
8c6db833 723
5da38d07
TH
724 if (controller) {
725 if (!cg_controller_is_valid(controller))
726 return -EINVAL;
727 } else
728 controller = SYSTEMD_CGROUP_CONTROLLER;
729
c22800e4 730 unified = cg_unified_controller(controller);
b4cccbc1
LP
731 if (unified < 0)
732 return unified;
733 if (unified == 0) {
b6629c4b
TH
734 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
735 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
736 else
737 controller_str = controller;
b6629c4b 738 }
7027ff61 739
b68fa010 740 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
741 r = fopen_unlocked(fs, "re", &f);
742 if (r == -ENOENT)
743 return -ESRCH;
744 if (r < 0)
745 return r;
35bbbf85 746
d2b39cb6
LP
747 for (;;) {
748 _cleanup_free_ char *line = NULL;
ae7ef63f 749 char *e;
c6c18be3 750
d2b39cb6
LP
751 r = read_line(f, LONG_LINE_MAX, &line);
752 if (r < 0)
753 return r;
754 if (r == 0)
ae7ef63f 755 return -ENODATA;
c6c18be3 756
efdb0237
LP
757 if (unified) {
758 e = startswith(line, "0:");
759 if (!e)
760 continue;
c6c18be3 761
efdb0237
LP
762 e = strchr(e, ':');
763 if (!e)
764 continue;
765 } else {
766 char *l;
efdb0237
LP
767
768 l = strchr(line, ':');
769 if (!l)
770 continue;
8af8afd6 771
efdb0237
LP
772 l++;
773 e = strchr(l, ':');
774 if (!e)
775 continue;
efdb0237 776 *e = 0;
ae7ef63f 777
bc20c31b 778 assert(controller_str);
ae7ef63f
ZJS
779 r = string_contains_word(l, ",", controller_str);
780 if (r < 0)
781 return r;
782 if (r == 0)
efdb0237 783 continue;
8af8afd6
LP
784 }
785
ae7ef63f
ZJS
786 char *path = strdup(e + 1);
787 if (!path)
7027ff61 788 return -ENOMEM;
c6c18be3 789
5e20b0a4 790 /* Truncate suffix indicating the process is a zombie */
ae7ef63f 791 e = endswith(path, " (deleted)");
5e20b0a4
LP
792 if (e)
793 *e = 0;
794
ae7ef63f 795 *ret_path = path;
7027ff61 796 return 0;
c6c18be3 797 }
8c6db833
LP
798}
799
f2a2e60b 800int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
a9062242
LP
801 _cleanup_free_ char *path = NULL;
802 int r;
803
804 assert(ret_path);
805
806 if (!pidref_is_set(pidref))
807 return -ESRCH;
808
809 r = cg_pid_get_path(controller, pidref->pid, &path);
810 if (r < 0)
811 return r;
812
813 /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
814 r = pidref_verify(pidref);
815 if (r < 0)
816 return r;
817
818 *ret_path = TAKE_PTR(path);
819 return 0;
820}
821
8c6db833 822int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 823 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 824 const char *sc;
415fc41c 825 int r;
8c6db833 826
8c6db833
LP
827 assert(agent);
828
c22800e4 829 r = cg_unified_controller(controller);
b4cccbc1
LP
830 if (r < 0)
831 return r;
832 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
833 return -EOPNOTSUPP;
834
7027ff61
LP
835 r = cg_get_path(controller, NULL, "release_agent", &fs);
836 if (r < 0)
c6c18be3 837 return r;
8c6db833 838
7027ff61
LP
839 r = read_one_line_file(fs, &contents);
840 if (r < 0)
841 return r;
8c6db833
LP
842
843 sc = strstrip(contents);
e155a0aa 844 if (isempty(sc)) {
604028de 845 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 846 if (r < 0)
7027ff61 847 return r;
b8725df8 848 } else if (!path_equal(sc, agent))
7027ff61 849 return -EEXIST;
8c6db833 850
0da16248 851 fs = mfree(fs);
7027ff61
LP
852 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
853 if (r < 0)
854 return r;
8c6db833 855
0da16248 856 contents = mfree(contents);
7027ff61
LP
857 r = read_one_line_file(fs, &contents);
858 if (r < 0)
859 return r;
8c6db833
LP
860
861 sc = strstrip(contents);
8c6db833 862 if (streq(sc, "0")) {
604028de 863 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
864 if (r < 0)
865 return r;
c6c18be3 866
7027ff61
LP
867 return 1;
868 }
8c6db833 869
7027ff61
LP
870 if (!streq(sc, "1"))
871 return -EIO;
8c6db833 872
7027ff61 873 return 0;
8c6db833
LP
874}
875
ad929bcc
KS
876int cg_uninstall_release_agent(const char *controller) {
877 _cleanup_free_ char *fs = NULL;
415fc41c 878 int r;
efdb0237 879
c22800e4 880 r = cg_unified_controller(controller);
b4cccbc1
LP
881 if (r < 0)
882 return r;
883 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 884 return -EOPNOTSUPP;
ad929bcc 885
ac9ef333
LP
886 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
887 if (r < 0)
888 return r;
889
604028de 890 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
891 if (r < 0)
892 return r;
893
0da16248 894 fs = mfree(fs);
ac9ef333 895
ad929bcc
KS
896 r = cg_get_path(controller, NULL, "release_agent", &fs);
897 if (r < 0)
898 return r;
899
604028de 900 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
901 if (r < 0)
902 return r;
903
ac9ef333 904 return 0;
ad929bcc
KS
905}
906
6f883237 907int cg_is_empty(const char *controller, const char *path) {
7027ff61 908 _cleanup_fclose_ FILE *f = NULL;
efdb0237 909 pid_t pid;
7027ff61 910 int r;
8c6db833 911
8c6db833
LP
912 assert(path);
913
b043cd0b 914 r = cg_enumerate_processes(controller, path, &f);
6f883237 915 if (r == -ENOENT)
1bcf3fc6 916 return true;
c3175a7f 917 if (r < 0)
6f883237 918 return r;
8c6db833 919
6f883237 920 r = cg_read_pid(f, &pid);
c6c18be3
LP
921 if (r < 0)
922 return r;
8c6db833 923
6f883237 924 return r == 0;
8c6db833
LP
925}
926
6f883237 927int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 928 int r;
8c6db833 929
8c6db833
LP
930 assert(path);
931
6fd66507 932 /* The root cgroup is always populated */
57ea45e1 933 if (controller && empty_or_root(path))
efdb0237 934 return false;
6fd66507 935
c22800e4 936 r = cg_unified_controller(controller);
b4cccbc1
LP
937 if (r < 0)
938 return r;
939 if (r > 0) {
ab2c3861 940 _cleanup_free_ char *t = NULL;
8c6db833 941
efdb0237 942 /* On the unified hierarchy we can check empty state
ab2c3861 943 * via the "populated" attribute of "cgroup.events". */
8c6db833 944
ab2c3861 945 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
946 if (r == -ENOENT)
947 return true;
efdb0237
LP
948 if (r < 0)
949 return r;
950
951 return streq(t, "0");
952 } else {
953 _cleanup_closedir_ DIR *d = NULL;
954 char *fn;
8c6db833 955
efdb0237 956 r = cg_is_empty(controller, path);
35d2e7ec 957 if (r <= 0)
7027ff61 958 return r;
35d2e7ec 959
efdb0237
LP
960 r = cg_enumerate_subgroups(controller, path, &d);
961 if (r == -ENOENT)
1bcf3fc6 962 return true;
efdb0237
LP
963 if (r < 0)
964 return r;
35d2e7ec 965
efdb0237
LP
966 while ((r = cg_read_subgroup(d, &fn)) > 0) {
967 _cleanup_free_ char *p = NULL;
968
657ee2d8 969 p = path_join(path, fn);
efdb0237
LP
970 free(fn);
971 if (!p)
972 return -ENOMEM;
973
974 r = cg_is_empty_recursive(controller, p);
975 if (r <= 0)
976 return r;
977 }
978 if (r < 0)
979 return r;
980
981 return true;
982 }
35d2e7ec
LP
983}
984
2a8020fe
ZJS
985int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
986 _cleanup_free_ char *controller = NULL, *path = NULL;
660087dc 987 int r;
35d2e7ec
LP
988
989 assert(spec);
35d2e7ec
LP
990
991 if (*spec == '/') {
99be45a4 992 if (!path_is_normalized(spec))
e884315e 993 return -EINVAL;
35d2e7ec 994
2a8020fe 995 if (ret_path) {
660087dc
ZJS
996 r = path_simplify_alloc(spec, &path);
997 if (r < 0)
998 return r;
8c6db833
LP
999 }
1000
2a8020fe
ZJS
1001 } else {
1002 const char *e;
35d2e7ec 1003
2a8020fe
ZJS
1004 e = strchr(spec, ':');
1005 if (e) {
1006 controller = strndup(spec, e-spec);
1007 if (!controller)
35d2e7ec 1008 return -ENOMEM;
2a8020fe
ZJS
1009 if (!cg_controller_is_valid(controller))
1010 return -EINVAL;
35d2e7ec 1011
2a8020fe
ZJS
1012 if (!isempty(e + 1)) {
1013 path = strdup(e+1);
1014 if (!path)
1015 return -ENOMEM;
35d2e7ec 1016
2a8020fe
ZJS
1017 if (!path_is_normalized(path) ||
1018 !path_is_absolute(path))
1019 return -EINVAL;
8c6db833 1020
4ff361cc 1021 path_simplify(path);
2a8020fe 1022 }
246aa6dd 1023
2a8020fe
ZJS
1024 } else {
1025 if (!cg_controller_is_valid(spec))
1026 return -EINVAL;
35d2e7ec 1027
2a8020fe
ZJS
1028 if (ret_controller) {
1029 controller = strdup(spec);
1030 if (!controller)
1031 return -ENOMEM;
1032 }
baa89da4 1033 }
baa89da4 1034 }
5954c074 1035
2a8020fe
ZJS
1036 if (ret_controller)
1037 *ret_controller = TAKE_PTR(controller);
1038 if (ret_path)
1039 *ret_path = TAKE_PTR(path);
35d2e7ec 1040 return 0;
8c6db833 1041}
c6c18be3 1042
820fe745 1043int cg_mangle_path(const char *path, char **ret) {
78edb35a 1044 _cleanup_free_ char *c = NULL, *p = NULL;
35d2e7ec
LP
1045 int r;
1046
1047 assert(path);
820fe745 1048 assert(ret);
35d2e7ec 1049
73e231ab 1050 /* First, check if it already is a filesystem path */
660087dc 1051 if (path_startswith(path, "/sys/fs/cgroup"))
820fe745 1052 return path_simplify_alloc(path, ret);
35d2e7ec 1053
73e231ab 1054 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1055 r = cg_split_spec(path, &c, &p);
1056 if (r < 0)
35d2e7ec
LP
1057 return r;
1058
820fe745 1059 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
35d2e7ec 1060}
1f73f0f1 1061
820fe745 1062int cg_get_root_path(char **ret_path) {
9444b1f2 1063 char *p, *e;
7027ff61
LP
1064 int r;
1065
820fe745 1066 assert(ret_path);
7027ff61 1067
9444b1f2 1068 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1069 if (r < 0)
1070 return r;
1071
efdb0237
LP
1072 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1073 if (!e)
1074 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1075 if (!e)
1076 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1077 if (e)
7027ff61
LP
1078 *e = 0;
1079
820fe745 1080 *ret_path = p;
1f73f0f1
LP
1081 return 0;
1082}
b59e2465 1083
820fe745 1084int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
751bc6ac
LP
1085 _cleanup_free_ char *rt = NULL;
1086 char *p;
ba1261bc
LP
1087 int r;
1088
e9174f29 1089 assert(cgroup);
820fe745 1090 assert(ret_shifted);
e9174f29
LP
1091
1092 if (!root) {
1093 /* If the root was specified let's use that, otherwise
1094 * let's determine it from PID 1 */
1095
751bc6ac 1096 r = cg_get_root_path(&rt);
e9174f29
LP
1097 if (r < 0)
1098 return r;
1099
751bc6ac 1100 root = rt;
e9174f29 1101 }
ba1261bc 1102
751bc6ac 1103 p = path_startswith(cgroup, root);
efdb0237 1104 if (p && p > cgroup)
820fe745 1105 *ret_shifted = p - 1;
751bc6ac 1106 else
820fe745 1107 *ret_shifted = cgroup;
751bc6ac
LP
1108
1109 return 0;
1110}
1111
820fe745 1112int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
751bc6ac
LP
1113 _cleanup_free_ char *raw = NULL;
1114 const char *c;
1115 int r;
1116
1117 assert(pid >= 0);
820fe745 1118 assert(ret_cgroup);
751bc6ac
LP
1119
1120 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1121 if (r < 0)
ba1261bc 1122 return r;
ba1261bc 1123
751bc6ac
LP
1124 r = cg_shift_path(raw, root, &c);
1125 if (r < 0)
1126 return r;
ba1261bc 1127
ae2a15bc 1128 if (c == raw)
820fe745 1129 *ret_cgroup = TAKE_PTR(raw);
ae2a15bc 1130 else {
751bc6ac 1131 char *n;
ba1261bc 1132
751bc6ac
LP
1133 n = strdup(c);
1134 if (!n)
ba1261bc 1135 return -ENOMEM;
ba1261bc 1136
820fe745 1137 *ret_cgroup = n;
751bc6ac 1138 }
ba1261bc
LP
1139
1140 return 0;
1141}
1142
820fe745 1143int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
8b0849e9
LP
1144 char *c, *s;
1145 size_t n;
ef1673d1
MT
1146
1147 assert(cgroup);
820fe745 1148 assert(ret_unit);
ef1673d1 1149
8b0849e9
LP
1150 n = strcspn(cgroup, "/");
1151 if (n < 3)
1152 return -ENXIO;
1153
2f82562b 1154 c = strndupa_safe(cgroup, n);
ae018d9b 1155 c = cg_unescape(c);
ef1673d1 1156
7410616c 1157 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1158 return -ENXIO;
ef1673d1 1159
d7bd3de0 1160 s = strdup(c);
6c03089c
LP
1161 if (!s)
1162 return -ENOMEM;
1163
820fe745 1164 *ret_unit = s;
ef1673d1
MT
1165 return 0;
1166}
1167
8b0849e9
LP
1168static bool valid_slice_name(const char *p, size_t n) {
1169
1170 if (!p)
1171 return false;
1172
fbd0b64f 1173 if (n < STRLEN("x.slice"))
8b0849e9
LP
1174 return false;
1175
1176 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1177 char buf[n+1], *c;
1178
1179 memcpy(buf, p, n);
1180 buf[n] = 0;
1181
1182 c = cg_unescape(buf);
1183
7410616c 1184 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1185 }
1186
1187 return false;
1188}
1189
9444b1f2 1190static const char *skip_slices(const char *p) {
8b0849e9
LP
1191 assert(p);
1192
9444b1f2
LP
1193 /* Skips over all slice assignments */
1194
1195 for (;;) {
1021b21b
LP
1196 size_t n;
1197
9444b1f2
LP
1198 p += strspn(p, "/");
1199
1200 n = strcspn(p, "/");
8b0849e9 1201 if (!valid_slice_name(p, n))
9444b1f2
LP
1202 return p;
1203
1204 p += n;
1205 }
1206}
1207
8b0849e9 1208int cg_path_get_unit(const char *path, char **ret) {
fe96c0f8 1209 _cleanup_free_ char *unit = NULL;
6c03089c 1210 const char *e;
8b0849e9 1211 int r;
6c03089c
LP
1212
1213 assert(path);
8b0849e9 1214 assert(ret);
6c03089c 1215
9444b1f2 1216 e = skip_slices(path);
6c03089c 1217
8b0849e9
LP
1218 r = cg_path_decode_unit(e, &unit);
1219 if (r < 0)
1220 return r;
1221
1222 /* We skipped over the slices, don't accept any now */
fe96c0f8 1223 if (endswith(unit, ".slice"))
8b0849e9 1224 return -ENXIO;
8b0849e9 1225
fe96c0f8 1226 *ret = TAKE_PTR(unit);
8b0849e9 1227 return 0;
6c03089c
LP
1228}
1229
ee164216
QD
1230int cg_path_get_unit_path(const char *path, char **ret) {
1231 _cleanup_free_ char *path_copy = NULL;
1232 char *unit_name;
1233
1234 assert(path);
1235 assert(ret);
1236
1237 path_copy = strdup(path);
1238 if (!path_copy)
1239 return -ENOMEM;
1240
1241 unit_name = (char *)skip_slices(path_copy);
1242 unit_name[strcspn(unit_name, "/")] = 0;
1243
1244 if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1245 return -ENXIO;
1246
1247 *ret = TAKE_PTR(path_copy);
1248
1249 return 0;
1250}
1251
820fe745 1252int cg_pid_get_unit(pid_t pid, char **ret_unit) {
7fd1b19b 1253 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1254 int r;
ba1261bc 1255
820fe745 1256 assert(ret_unit);
ef1673d1 1257
7027ff61 1258 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1259 if (r < 0)
1260 return r;
1261
820fe745 1262 return cg_path_get_unit(cgroup, ret_unit);
6c03089c 1263}
ef1673d1 1264
2c302e89
MY
1265int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
1266 _cleanup_free_ char *unit = NULL;
1267 int r;
1268
1269 assert(ret);
1270
1271 if (!pidref_is_set(pidref))
1272 return -ESRCH;
1273
1274 r = cg_pid_get_unit(pidref->pid, &unit);
1275 if (r < 0)
1276 return r;
1277
1278 r = pidref_verify(pidref);
1279 if (r < 0)
1280 return r;
1281
1282 *ret = TAKE_PTR(unit);
1283 return 0;
1284}
1285
d4fffc4b
ZJS
1286/**
1287 * Skip session-*.scope, but require it to be there.
1288 */
9444b1f2
LP
1289static const char *skip_session(const char *p) {
1290 size_t n;
1291
8b0849e9
LP
1292 if (isempty(p))
1293 return NULL;
9444b1f2
LP
1294
1295 p += strspn(p, "/");
1296
1297 n = strcspn(p, "/");
fbd0b64f 1298 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1299 return NULL;
1300
8b0849e9
LP
1301 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1302 char buf[n - 8 - 6 + 1];
1303
1304 memcpy(buf, p + 8, n - 8 - 6);
1305 buf[n - 8 - 6] = 0;
d4fffc4b 1306
8b0849e9
LP
1307 /* Note that session scopes never need unescaping,
1308 * since they cannot conflict with the kernel's own
1309 * names, hence we don't need to call cg_unescape()
1310 * here. */
1311
1312 if (!session_id_valid(buf))
db8e7209 1313 return NULL;
8b0849e9
LP
1314
1315 p += n;
1316 p += strspn(p, "/");
1317 return p;
1318 }
1319
1320 return NULL;
d4fffc4b
ZJS
1321}
1322
1323/**
1324 * Skip user@*.service, but require it to be there.
1325 */
1326static const char *skip_user_manager(const char *p) {
1327 size_t n;
1328
8b0849e9
LP
1329 if (isempty(p))
1330 return NULL;
d4fffc4b
ZJS
1331
1332 p += strspn(p, "/");
1333
1334 n = strcspn(p, "/");
fbd0b64f 1335 if (n < STRLEN("user@x.service"))
6c03089c 1336 return NULL;
ef1673d1 1337
8b0849e9
LP
1338 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1339 char buf[n - 5 - 8 + 1];
9444b1f2 1340
8b0849e9
LP
1341 memcpy(buf, p + 5, n - 5 - 8);
1342 buf[n - 5 - 8] = 0;
1343
1344 /* Note that user manager services never need unescaping,
1345 * since they cannot conflict with the kernel's own
1346 * names, hence we don't need to call cg_unescape()
1347 * here. */
1348
1349 if (parse_uid(buf, NULL) < 0)
1350 return NULL;
1351
1352 p += n;
1353 p += strspn(p, "/");
1354
1355 return p;
1356 }
1357
1358 return NULL;
9444b1f2
LP
1359}
1360
329ac4bc 1361static const char *skip_user_prefix(const char *path) {
d4fffc4b 1362 const char *e, *t;
ef1673d1 1363
6c03089c 1364 assert(path);
ba1261bc 1365
9444b1f2
LP
1366 /* Skip slices, if there are any */
1367 e = skip_slices(path);
ba1261bc 1368
329ac4bc 1369 /* Skip the user manager, if it's in the path now... */
8b0849e9 1370 t = skip_user_manager(e);
329ac4bc
LP
1371 if (t)
1372 return t;
8b0849e9 1373
329ac4bc
LP
1374 /* Alternatively skip the user session if it is in the path... */
1375 return skip_session(e);
1376}
32081481 1377
329ac4bc
LP
1378int cg_path_get_user_unit(const char *path, char **ret) {
1379 const char *t;
6c03089c 1380
329ac4bc
LP
1381 assert(path);
1382 assert(ret);
8b0849e9 1383
329ac4bc
LP
1384 t = skip_user_prefix(path);
1385 if (!t)
8b0849e9 1386 return -ENXIO;
8b0849e9 1387
bf21be10
LP
1388 /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1389 * parser. */
329ac4bc 1390 return cg_path_get_unit(t, ret);
ef1673d1 1391}
ba1261bc 1392
820fe745 1393int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
7fd1b19b 1394 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1395 int r;
1396
820fe745 1397 assert(ret_unit);
6c03089c 1398
7027ff61 1399 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1400 if (r < 0)
1401 return r;
1402
820fe745 1403 return cg_path_get_user_unit(cgroup, ret_unit);
ba1261bc 1404}
e884315e 1405
820fe745 1406int cg_path_get_machine_name(const char *path, char **ret_machine) {
efdb0237
LP
1407 _cleanup_free_ char *u = NULL;
1408 const char *sl;
89f7c846 1409 int r;
374ec6ab 1410
89f7c846
LP
1411 r = cg_path_get_unit(path, &u);
1412 if (r < 0)
1413 return r;
7027ff61 1414
efdb0237 1415 sl = strjoina("/run/systemd/machines/unit:", u);
820fe745 1416 return readlink_malloc(sl, ret_machine);
7027ff61
LP
1417}
1418
820fe745 1419int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
7fd1b19b 1420 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1421 int r;
1422
820fe745 1423 assert(ret_machine);
7027ff61
LP
1424
1425 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1426 if (r < 0)
1427 return r;
1428
820fe745 1429 return cg_path_get_machine_name(cgroup, ret_machine);
7027ff61
LP
1430}
1431
535e3dd0 1432int cg_path_get_cgroupid(const char *path, uint64_t *ret) {
a5edf95e 1433 cg_file_handle fh = CG_FILE_HANDLE_INIT;
535e3dd0
ILG
1434 int mnt_id = -1;
1435
1436 assert(path);
1437 assert(ret);
1438
535e3dd0
ILG
1439 /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
1440 * name_to_handle_at_loop() does in mountpoint-util.c */
a5edf95e 1441 if (name_to_handle_at(AT_FDCWD, path, &fh.file_handle, &mnt_id, 0) < 0)
535e3dd0
ILG
1442 return -errno;
1443
a5edf95e 1444 *ret = CG_FILE_HANDLE_CGROUPID(fh);
535e3dd0
ILG
1445 return 0;
1446}
1447
820fe745 1448int cg_path_get_session(const char *path, char **ret_session) {
8b0849e9
LP
1449 _cleanup_free_ char *unit = NULL;
1450 char *start, *end;
1451 int r;
7027ff61
LP
1452
1453 assert(path);
7027ff61 1454
8b0849e9
LP
1455 r = cg_path_get_unit(path, &unit);
1456 if (r < 0)
1457 return r;
7027ff61 1458
8b0849e9
LP
1459 start = startswith(unit, "session-");
1460 if (!start)
cfeaa44a 1461 return -ENXIO;
8b0849e9
LP
1462 end = endswith(start, ".scope");
1463 if (!end)
cfeaa44a 1464 return -ENXIO;
8b0849e9
LP
1465
1466 *end = 0;
1467 if (!session_id_valid(start))
cfeaa44a 1468 return -ENXIO;
374ec6ab 1469
820fe745 1470 if (ret_session) {
8b0849e9 1471 char *rr;
af08d2f9 1472
8b0849e9
LP
1473 rr = strdup(start);
1474 if (!rr)
af08d2f9
LP
1475 return -ENOMEM;
1476
820fe745 1477 *ret_session = rr;
af08d2f9 1478 }
7027ff61 1479
7027ff61
LP
1480 return 0;
1481}
1482
820fe745 1483int cg_pid_get_session(pid_t pid, char **ret_session) {
7fd1b19b 1484 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1485 int r;
1486
7027ff61
LP
1487 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1488 if (r < 0)
1489 return r;
1490
820fe745 1491 return cg_path_get_session(cgroup, ret_session);
7027ff61
LP
1492}
1493
820fe745 1494int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
374ec6ab 1495 _cleanup_free_ char *slice = NULL;
8b0849e9 1496 char *start, *end;
374ec6ab 1497 int r;
ae018d9b
LP
1498
1499 assert(path);
ae018d9b 1500
374ec6ab
LP
1501 r = cg_path_get_slice(path, &slice);
1502 if (r < 0)
1503 return r;
ae018d9b 1504
674eb685
LP
1505 start = startswith(slice, "user-");
1506 if (!start)
cfeaa44a 1507 return -ENXIO;
820fe745 1508
8b0849e9 1509 end = endswith(start, ".slice");
674eb685 1510 if (!end)
cfeaa44a 1511 return -ENXIO;
ae018d9b 1512
8b0849e9 1513 *end = 0;
820fe745 1514 if (parse_uid(start, ret_uid) < 0)
cfeaa44a 1515 return -ENXIO;
674eb685 1516
674eb685 1517 return 0;
ae018d9b
LP
1518}
1519
820fe745 1520int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
ae018d9b
LP
1521 _cleanup_free_ char *cgroup = NULL;
1522 int r;
1523
ae018d9b
LP
1524 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1525 if (r < 0)
1526 return r;
1527
820fe745 1528 return cg_path_get_owner_uid(cgroup, ret_uid);
ae018d9b
LP
1529}
1530
820fe745 1531int cg_path_get_slice(const char *p, char **ret_slice) {
1021b21b 1532 const char *e = NULL;
1021b21b
LP
1533
1534 assert(p);
820fe745 1535 assert(ret_slice);
1021b21b 1536
329ac4bc
LP
1537 /* Finds the right-most slice unit from the beginning, but
1538 * stops before we come to the first non-slice unit. */
1539
1021b21b
LP
1540 for (;;) {
1541 size_t n;
1542
1543 p += strspn(p, "/");
1544
1545 n = strcspn(p, "/");
8b0849e9 1546 if (!valid_slice_name(p, n)) {
1021b21b 1547
8b0849e9
LP
1548 if (!e) {
1549 char *s;
1021b21b 1550
e5d855d3 1551 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1552 if (!s)
1553 return -ENOMEM;
1021b21b 1554
820fe745 1555 *ret_slice = s;
8b0849e9
LP
1556 return 0;
1557 }
1558
820fe745 1559 return cg_path_decode_unit(e, ret_slice);
1021b21b
LP
1560 }
1561
1562 e = p;
1021b21b
LP
1563 p += n;
1564 }
1565}
1566
820fe745 1567int cg_pid_get_slice(pid_t pid, char **ret_slice) {
1021b21b
LP
1568 _cleanup_free_ char *cgroup = NULL;
1569 int r;
1570
820fe745 1571 assert(ret_slice);
1021b21b
LP
1572
1573 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1574 if (r < 0)
1575 return r;
1576
820fe745 1577 return cg_path_get_slice(cgroup, ret_slice);
1021b21b
LP
1578}
1579
820fe745 1580int cg_path_get_user_slice(const char *p, char **ret_slice) {
329ac4bc
LP
1581 const char *t;
1582 assert(p);
820fe745 1583 assert(ret_slice);
329ac4bc
LP
1584
1585 t = skip_user_prefix(p);
1586 if (!t)
1587 return -ENXIO;
1588
820fe745
LP
1589 /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1590 * from here on. */
1591 return cg_path_get_slice(t, ret_slice);
329ac4bc
LP
1592}
1593
820fe745 1594int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
329ac4bc
LP
1595 _cleanup_free_ char *cgroup = NULL;
1596 int r;
1597
820fe745 1598 assert(ret_slice);
329ac4bc
LP
1599
1600 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1601 if (r < 0)
1602 return r;
1603
820fe745 1604 return cg_path_get_user_slice(cgroup, ret_slice);
329ac4bc
LP
1605}
1606
1a56b0c0 1607bool cg_needs_escape(const char *p) {
ae018d9b 1608
1a56b0c0
LP
1609 /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1610 * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1611 * they shall be used. Also note that various names cannot be made valid by escaping even if we
1612 * return true here (because too long, or contain the forbidden character "/"). */
ae018d9b 1613
1a56b0c0
LP
1614 if (!filename_is_valid(p))
1615 return true;
efdb0237 1616
1a56b0c0
LP
1617 if (IN_SET(p[0], '_', '.'))
1618 return true;
ae018d9b 1619
1a56b0c0
LP
1620 if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
1621 return true;
ae018d9b 1622
1a56b0c0
LP
1623 if (startswith(p, "cgroup."))
1624 return true;
efdb0237 1625
1a56b0c0
LP
1626 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1627 const char *q;
1628
1629 q = startswith(p, cgroup_controller_to_string(c));
1630 if (!q)
1631 continue;
1632
1633 if (q[0] == '.')
1634 return true;
ae018d9b
LP
1635 }
1636
1a56b0c0
LP
1637 return false;
1638}
efdb0237 1639
1a56b0c0
LP
1640int cg_escape(const char *p, char **ret) {
1641 _cleanup_free_ char *n = NULL;
1642
1643 /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1644 * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1645 * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1646 * is one. */
1647
1648 /* The return value of this function (unlike cg_unescape()) needs free()! */
1649
1650 if (cg_needs_escape(p)) {
1651 n = strjoin("_", p);
1652 if (!n)
1653 return -ENOMEM;
1654
1655 if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
1656 return -EINVAL;
1657 } else {
1658 n = strdup(p);
1659 if (!n)
1660 return -ENOMEM;
1661 }
1662
1663 *ret = TAKE_PTR(n);
1664 return 0;
ae018d9b
LP
1665}
1666
1667char *cg_unescape(const char *p) {
1668 assert(p);
1669
1670 /* The return value of this function (unlike cg_escape())
1671 * doesn't need free()! */
1672
1673 if (p[0] == '_')
1674 return (char*) p+1;
1675
1676 return (char*) p;
1677}
78edb35a
LP
1678
1679#define CONTROLLER_VALID \
4b549144 1680 DIGITS LETTERS \
78edb35a
LP
1681 "_"
1682
185a0874 1683bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1684 const char *t, *s;
1685
1686 if (!p)
1687 return false;
1688
b6629c4b
TH
1689 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1690 return true;
1691
185a0874
DJL
1692 s = startswith(p, "name=");
1693 if (s)
1694 p = s;
78edb35a 1695
4c701096 1696 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1697 return false;
1698
1699 for (t = p; *t; t++)
1700 if (!strchr(CONTROLLER_VALID, *t))
1701 return false;
1702
8ca94009 1703 if (t - p > NAME_MAX)
78edb35a
LP
1704 return false;
1705
1706 return true;
1707}
a016b922
LP
1708
1709int cg_slice_to_path(const char *unit, char **ret) {
1710 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1711 const char *dash;
7410616c 1712 int r;
a016b922
LP
1713
1714 assert(unit);
1715 assert(ret);
1716
e5d855d3 1717 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1718 char *x;
1719
1720 x = strdup("");
1721 if (!x)
1722 return -ENOMEM;
1723 *ret = x;
1724 return 0;
1725 }
1726
7410616c 1727 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1728 return -EINVAL;
1729
1730 if (!endswith(unit, ".slice"))
1731 return -EINVAL;
1732
7410616c
LP
1733 r = unit_name_to_prefix(unit, &p);
1734 if (r < 0)
1735 return r;
a016b922
LP
1736
1737 dash = strchr(p, '-');
e66e5b61
LP
1738
1739 /* Don't allow initial dashes */
1740 if (dash == p)
1741 return -EINVAL;
1742
a016b922
LP
1743 while (dash) {
1744 _cleanup_free_ char *escaped = NULL;
1745 char n[dash - p + sizeof(".slice")];
1746
989290db 1747#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501 1748 /* msan doesn't instrument stpncpy, so it thinks
5238e957 1749 * n is later used uninitialized:
1c56d501
ZJS
1750 * https://github.com/google/sanitizers/issues/926
1751 */
1752 zero(n);
1753#endif
1754
e66e5b61 1755 /* Don't allow trailing or double dashes */
4c701096 1756 if (IN_SET(dash[1], 0, '-'))
c96cc582 1757 return -EINVAL;
a016b922 1758
c96cc582 1759 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1760 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1761 return -EINVAL;
1762
1a56b0c0
LP
1763 r = cg_escape(n, &escaped);
1764 if (r < 0)
1765 return r;
a016b922 1766
c2bc710b 1767 if (!strextend(&s, escaped, "/"))
a016b922
LP
1768 return -ENOMEM;
1769
1770 dash = strchr(dash+1, '-');
1771 }
1772
1a56b0c0
LP
1773 r = cg_escape(unit, &e);
1774 if (r < 0)
1775 return r;
a016b922 1776
c2bc710b 1777 if (!strextend(&s, e))
a016b922
LP
1778 return -ENOMEM;
1779
ae2a15bc 1780 *ret = TAKE_PTR(s);
a016b922
LP
1781 return 0;
1782}
4ad49000 1783
bd1791b5 1784int cg_is_threaded(const char *path) {
084e7706
YW
1785 _cleanup_free_ char *fs = NULL, *contents = NULL;
1786 _cleanup_strv_free_ char **v = NULL;
1787 int r;
1788
bd1791b5 1789 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
084e7706
YW
1790 if (r < 0)
1791 return r;
1792
1793 r = read_full_virtual_file(fs, &contents, NULL);
1794 if (r == -ENOENT)
1795 return false; /* Assume no. */
1796 if (r < 0)
1797 return r;
1798
1799 v = strv_split(contents, NULL);
1800 if (!v)
1801 return -ENOMEM;
1802
1803 /* If the cgroup is in the threaded mode, it contains "threaded".
1804 * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1805 return strv_contains(v, "threaded") || strv_contains(v, "invalid");
1806}
1807
4ad49000
LP
1808int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1809 _cleanup_free_ char *p = NULL;
1810 int r;
1811
1812 r = cg_get_path(controller, path, attribute, &p);
1813 if (r < 0)
1814 return r;
1815
604028de 1816 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
1817}
1818
934277fe
LP
1819int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1820 _cleanup_free_ char *p = NULL;
1821 int r;
1822
1823 r = cg_get_path(controller, path, attribute, &p);
1824 if (r < 0)
1825 return r;
1826
1827 return read_one_line_file(p, ret);
1828}
1829
613328c3
AZ
1830int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
1831 _cleanup_free_ char *value = NULL;
1832 uint64_t v;
1833 int r;
1834
1835 assert(ret);
1836
1837 r = cg_get_attribute(controller, path, attribute, &value);
1838 if (r == -ENOENT)
1839 return -ENODATA;
1840 if (r < 0)
1841 return r;
1842
1843 if (streq(value, "max")) {
1844 *ret = CGROUP_LIMIT_MAX;
1845 return 0;
1846 }
1847
1848 r = safe_atou64(value, &v);
1849 if (r < 0)
1850 return r;
1851
1852 *ret = v;
1853 return 0;
1854}
1855
b41dcc51
AZ
1856int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
1857 _cleanup_free_ char *value = NULL;
1858 int r;
1859
1860 assert(ret);
1861
1862 r = cg_get_attribute(controller, path, attribute, &value);
1863 if (r == -ENOENT)
1864 return -ENODATA;
1865 if (r < 0)
1866 return r;
1867
1868 r = parse_boolean(value);
1869 if (r < 0)
1870 return r;
1871
1872 *ret = r;
1873 return 0;
1874}
1875
bd1791b5 1876int cg_get_owner(const char *path, uid_t *ret_uid) {
59331b8e
AZ
1877 _cleanup_free_ char *f = NULL;
1878 struct stat stats;
1879 int r;
1880
1881 assert(ret_uid);
1882
bd1791b5 1883 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
59331b8e
AZ
1884 if (r < 0)
1885 return r;
1886
b30da1c6 1887 if (stat(f, &stats) < 0)
59331b8e
AZ
1888 return -errno;
1889
b30da1c6
LP
1890 r = stat_verify_directory(&stats);
1891 if (r < 0)
1892 return r;
1893
59331b8e
AZ
1894 *ret_uid = stats.st_uid;
1895 return 0;
1896}
1897
25a1f04c 1898int cg_get_keyed_attribute_full(
b734a4ff
LP
1899 const char *controller,
1900 const char *path,
1901 const char *attribute,
1902 char **keys,
25a1f04c
MS
1903 char **ret_values,
1904 CGroupKeyMode mode) {
66ebf6c0 1905
b734a4ff 1906 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 1907 const char *p;
9177fa9f 1908 size_t n, i, n_done = 0;
b734a4ff
LP
1909 char **v;
1910 int r;
1911
4e1dfa45 1912 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
1913 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1914 * entries as 'keys'. On success each entry will be set to the value of the matching key.
1915 *
d9e45bc3
MS
1916 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1917 * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
66ebf6c0
TH
1918
1919 r = cg_get_path(controller, path, attribute, &filename);
1920 if (r < 0)
1921 return r;
1922
b734a4ff 1923 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
1924 if (r < 0)
1925 return r;
1926
b734a4ff
LP
1927 n = strv_length(keys);
1928 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1929 return 0;
66ebf6c0 1930
b734a4ff
LP
1931 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1932 v = newa0(char*, n);
66ebf6c0 1933
b734a4ff
LP
1934 for (p = contents; *p;) {
1935 const char *w = NULL;
b734a4ff 1936
9177fa9f
ZJS
1937 for (i = 0; i < n; i++)
1938 if (!v[i]) {
b734a4ff
LP
1939 w = first_word(p, keys[i]);
1940 if (w)
1941 break;
66ebf6c0 1942 }
66ebf6c0 1943
b734a4ff 1944 if (w) {
b734a4ff
LP
1945 size_t l;
1946
1947 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
1948 v[i] = strndup(w, l);
1949 if (!v[i]) {
b734a4ff
LP
1950 r = -ENOMEM;
1951 goto fail;
66ebf6c0 1952 }
b734a4ff 1953
b734a4ff 1954 n_done++;
b734a4ff
LP
1955 if (n_done >= n)
1956 goto done;
1957
1958 p = w + l;
9177fa9f 1959 } else
b734a4ff 1960 p += strcspn(p, NEWLINE);
b734a4ff
LP
1961
1962 p += strspn(p, NEWLINE);
66ebf6c0
TH
1963 }
1964
25a1f04c
MS
1965 if (mode & CG_KEY_MODE_GRACEFUL)
1966 goto done;
d9e45bc3
MS
1967
1968 r = -ENXIO;
b734a4ff
LP
1969
1970fail:
24ae45cb 1971 free_many_charp(v, n);
b734a4ff
LP
1972 return r;
1973
1974done:
1975 memcpy(ret_values, v, sizeof(char*) * n);
25a1f04c
MS
1976 if (mode & CG_KEY_MODE_GRACEFUL)
1977 return n_done;
1978
66ebf6c0 1979 return 0;
4ad49000
LP
1980}
1981
aae7e17f 1982int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d 1983 _cleanup_free_ char *s = NULL;
ec635a2d 1984 bool space = false;
aae7e17f 1985 CGroupController c;
319a4f4b 1986 size_t n = 0;
aae7e17f
FB
1987
1988 assert(ret);
1989
1990 if (mask == 0) {
1991 *ret = NULL;
1992 return 0;
1993 }
1994
1995 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
1996 const char *k;
1997 size_t l;
aae7e17f 1998
f99850a0 1999 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
2000 continue;
2001
ec635a2d
LP
2002 k = cgroup_controller_to_string(c);
2003 l = strlen(k);
2004
319a4f4b 2005 if (!GREEDY_REALLOC(s, n + space + l + 1))
ec635a2d
LP
2006 return -ENOMEM;
2007
2008 if (space)
2009 s[n] = ' ';
2010 memcpy(s + n + space, k, l);
2011 n += space + l;
2012
2013 space = true;
aae7e17f
FB
2014 }
2015
ec635a2d 2016 assert(s);
aae7e17f 2017
ec635a2d 2018 s[n] = 0;
ae2a15bc 2019 *ret = TAKE_PTR(s);
ec635a2d 2020
aae7e17f
FB
2021 return 0;
2022}
2023
38a90d45
LP
2024int cg_mask_from_string(const char *value, CGroupMask *ret) {
2025 CGroupMask m = 0;
2026
2027 assert(ret);
aae7e17f
FB
2028 assert(value);
2029
2030 for (;;) {
2031 _cleanup_free_ char *n = NULL;
2032 CGroupController v;
2033 int r;
2034
2035 r = extract_first_word(&value, &n, NULL, 0);
2036 if (r < 0)
2037 return r;
2038 if (r == 0)
2039 break;
2040
2041 v = cgroup_controller_from_string(n);
2042 if (v < 0)
2043 continue;
2044
38a90d45 2045 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2046 }
38a90d45
LP
2047
2048 *ret = m;
aae7e17f
FB
2049 return 0;
2050}
2051
0fa7b500 2052int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
38a90d45 2053 CGroupMask mask;
415fc41c 2054 int r;
efdb0237 2055
67558d15
LP
2056 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2057 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2058 * pseudo-controllers. */
4ad49000 2059
b4cccbc1
LP
2060 r = cg_all_unified();
2061 if (r < 0)
2062 return r;
2063 if (r > 0) {
0fa7b500 2064 _cleanup_free_ char *controllers = NULL, *path = NULL;
efdb0237 2065
d51c4fca
YW
2066 /* In the unified hierarchy we can read the supported and accessible controllers from
2067 * the top-level cgroup attribute */
efdb0237 2068
5f4c5fef
LP
2069 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2070 if (r < 0)
2071 return r;
2072
2073 r = read_one_line_file(path, &controllers);
efdb0237
LP
2074 if (r < 0)
2075 return r;
4ad49000 2076
aae7e17f
FB
2077 r = cg_mask_from_string(controllers, &mask);
2078 if (r < 0)
2079 return r;
efdb0237 2080
1fbbb526 2081 /* Mask controllers that are not supported in unified hierarchy. */
03afd780 2082 mask &= CGROUP_MASK_V2;
efdb0237
LP
2083
2084 } else {
2085 CGroupController c;
2086
0fa7b500 2087 /* In the legacy hierarchy, we check which hierarchies are accessible. */
efdb0237 2088
38a90d45 2089 mask = 0;
efdb0237 2090 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2091 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2092 const char *n;
2093
03afd780
LP
2094 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2095 continue;
2096
efdb0237 2097 n = cgroup_controller_to_string(c);
0fa7b500 2098 if (controller_is_v1_accessible(root, n) >= 0)
03afd780 2099 mask |= bit;
efdb0237 2100 }
4ad49000
LP
2101 }
2102
efdb0237
LP
2103 *ret = mask;
2104 return 0;
4ad49000 2105}
b12afc8c 2106
0fa7b500
MK
2107int cg_mask_supported(CGroupMask *ret) {
2108 _cleanup_free_ char *root = NULL;
2109 int r;
2110
2111 r = cg_get_root_path(&root);
2112 if (r < 0)
2113 return r;
2114
2115 return cg_mask_supported_subtree(root, ret);
2116}
2117
6925a0de 2118int cg_kernel_controllers(Set **ret) {
594c3835 2119 _cleanup_set_free_ Set *controllers = NULL;
b12afc8c 2120 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2121 int r;
2122
6925a0de 2123 assert(ret);
b12afc8c 2124
f09e86bc
LS
2125 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2126 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2127 * pseudo-controllers. */
e155a0aa 2128
fdeea3f4
ZJS
2129 r = fopen_unlocked("/proc/cgroups", "re", &f);
2130 if (r == -ENOENT) {
2131 *ret = NULL;
2132 return 0;
b12afc8c 2133 }
fdeea3f4
ZJS
2134 if (r < 0)
2135 return r;
35bbbf85 2136
b12afc8c 2137 /* Ignore the header line */
f5fbe71d 2138 (void) read_line(f, SIZE_MAX, NULL);
b12afc8c
LP
2139
2140 for (;;) {
dccdbf9b 2141 _cleanup_free_ char *controller = NULL;
b12afc8c
LP
2142 int enabled = 0;
2143
b12afc8c
LP
2144 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2145
121b7054
MY
2146 if (ferror(f))
2147 return -errno;
2148
b12afc8c
LP
2149 if (feof(f))
2150 break;
2151
b12afc8c
LP
2152 return -EBADMSG;
2153 }
2154
dccdbf9b 2155 if (!enabled)
b12afc8c 2156 continue;
b12afc8c 2157
dccdbf9b 2158 if (!cg_controller_is_valid(controller))
b12afc8c 2159 return -EBADMSG;
b12afc8c 2160
594c3835 2161 r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
b12afc8c
LP
2162 if (r < 0)
2163 return r;
2164 }
2165
1cc6c93a 2166 *ret = TAKE_PTR(controllers);
6925a0de 2167
b12afc8c
LP
2168 return 0;
2169}
efdb0237 2170
d4d99bc6
ZJS
2171/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2172 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2173 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2174 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2175 * with other tools.
f08e9287 2176 *
d4d99bc6
ZJS
2177 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2178 * cgroup v2 process management but disable the compat dual layout, we return true on
2179 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
f08e9287
TH
2180 */
2181static thread_local bool unified_systemd_v232;
2182
d4d99bc6
ZJS
2183int cg_unified_cached(bool flush) {
2184 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
efdb0237 2185
efdb0237
LP
2186 struct statfs fs;
2187
2188 /* Checks if we support the unified hierarchy. Returns an
2189 * error when the cgroup hierarchies aren't mounted yet or we
2190 * have any other trouble determining if the unified hierarchy
2191 * is supported. */
2192
d4d99bc6
ZJS
2193 if (flush)
2194 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2195 else if (unified_cache >= CGROUP_UNIFIED_NONE)
2196 return unified_cache;
efdb0237
LP
2197
2198 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2199 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2200
9aa21133
ZJS
2201 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2202 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2203 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2204 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2205 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2206 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2207 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2208 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2209 unified_systemd_v232 = false;
f08e9287 2210 } else {
2156061f
MG
2211 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
2212 if (errno == ENOENT) {
2213 /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2214 log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
2215 return -ENOMEDIUM;
2216 }
9aa21133 2217 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2156061f 2218 }
5535d8f7
EV
2219
2220 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2221 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2222 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2223 unified_systemd_v232 = true;
2224 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2225 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2226 unified_cache = CGROUP_UNIFIED_NONE;
2227 } else {
2228 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2229 (unsigned long long) fs.f_type);
5535d8f7 2230 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2231 }
2977724b 2232 }
0bc5f001
DS
2233 } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
2234 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2235 "No filesystem is currently mounted on /sys/fs/cgroup.");
baaa35ad
ZJS
2236 } else
2237 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2238 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2239 (unsigned long long)fs.f_type);
efdb0237 2240
d4d99bc6 2241 return unified_cache;
5da38d07
TH
2242}
2243
c22800e4 2244int cg_unified_controller(const char *controller) {
b4cccbc1 2245 int r;
5da38d07 2246
d4d99bc6 2247 r = cg_unified_cached(false);
b4cccbc1
LP
2248 if (r < 0)
2249 return r;
5da38d07 2250
d4d99bc6 2251 if (r == CGROUP_UNIFIED_NONE)
fc9ae717
LP
2252 return false;
2253
d4d99bc6 2254 if (r >= CGROUP_UNIFIED_ALL)
fc9ae717
LP
2255 return true;
2256
2257 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2258}
2259
b4cccbc1 2260int cg_all_unified(void) {
4bb652ac
LP
2261 int r;
2262
d4d99bc6 2263 r = cg_unified_cached(false);
4bb652ac
LP
2264 if (r < 0)
2265 return r;
2266
d4d99bc6 2267 return r >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2268}
2269
b4cccbc1
LP
2270int cg_hybrid_unified(void) {
2271 int r;
2977724b 2272
d4d99bc6 2273 r = cg_unified_cached(false);
b4cccbc1
LP
2274 if (r < 0)
2275 return r;
2977724b 2276
d4d99bc6 2277 return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
13c31542
TH
2278}
2279
b426b4ee
NR
2280int cg_is_delegated(const char *path) {
2281 int r;
2282
2283 assert(path);
2284
2285 r = cg_get_xattr_bool(path, "trusted.delegate");
6414203c
LP
2286 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2287 return r;
b426b4ee 2288
6414203c
LP
2289 /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2290 * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2291 * delegated or not this should be safe. */
2292 r = cg_get_xattr_bool(path, "user.delegate");
2293 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
2294}
2295
2296int cg_is_delegated_fd(int fd) {
2297 int r;
2298
2299 assert(fd >= 0);
2300
2301 r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
2302 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2303 return r;
2304
2305 r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
2306 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
b426b4ee
NR
2307}
2308
6cf96ab4
NR
2309int cg_has_coredump_receive(const char *path) {
2310 int r;
2311
2312 assert(path);
2313
2314 r = cg_get_xattr_bool(path, "user.coredump_receive");
2315 if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2316 return false;
2317
2318 return r;
2319}
2320
9be57249
TH
2321const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2322 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2323 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2324 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2325 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2326};
2327
2328static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2329 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2330 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2331 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2332 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2333};
2334
2335DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2336
f0bef277
EV
2337bool is_cgroup_fs(const struct statfs *s) {
2338 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2339 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2340}
2341
2342bool fd_is_cgroup_fs(int fd) {
2343 struct statfs s;
2344
2345 if (fstatfs(fd, &s) < 0)
2346 return -errno;
2347
2348 return is_cgroup_fs(&s);
2349}
2350
b82f71c7 2351static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2352 [CGROUP_CONTROLLER_CPU] = "cpu",
2353 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
047f5d63 2354 [CGROUP_CONTROLLER_CPUSET] = "cpuset",
13c31542 2355 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2356 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2357 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2358 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2359 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2360 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2361 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
506ea51b 2362 [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
a8e5eb17 2363 [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
6f50d4f7 2364 [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
efdb0237
LP
2365};
2366
2367DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2368
2369CGroupMask get_cpu_accounting_mask(void) {
2370 static CGroupMask needed_mask = (CGroupMask) -1;
2371
2372 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2373 * provided externally from the CPU controller, which means we don't
2374 * need to enable the CPU controller just to get metrics. This is good,
2375 * because enabling the CPU controller comes at a minor performance
2376 * hit, especially when it's propagated deep into large hierarchies.
2377 * There's also no separate CPU accounting controller available within
2378 * a unified hierarchy.
2379 *
2380 * This combination of factors results in the desired cgroup mask to
2381 * enable for CPU accounting varying as follows:
2382 *
2383 * ╔═════════════════════╤═════════════════════╗
2384 * ║ Linux ≥4.15 │ Linux <4.15 ║
2385 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2386 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2387 * ╟───────────────╫─────────────────────┼─────────────────────╢
2388 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2389 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2390 *
2391 * We check kernel version here instead of manually checking whether
2392 * cpu.stat is present for every cgroup, as that check in itself would
2393 * already be fairly expensive.
2394 *
2395 * Kernels where this patch has been backported will therefore have the
2396 * CPU controller enabled unnecessarily. This is more expensive than
2397 * necessary, but harmless. ☺️
2398 */
2399
2400 if (needed_mask == (CGroupMask) -1) {
2401 if (cg_all_unified()) {
2402 struct utsname u;
2403 assert_se(uname(&u) >= 0);
2404
8087644a 2405 if (strverscmp_improved(u.release, "4.15") < 0)
f98c2585
CD
2406 needed_mask = CGROUP_MASK_CPU;
2407 else
2408 needed_mask = 0;
2409 } else
2410 needed_mask = CGROUP_MASK_CPUACCT;
2411 }
2412
2413 return needed_mask;
2414}
2415
2416bool cpu_accounting_is_cheap(void) {
2417 return get_cpu_accounting_mask() == 0;
2418}
4d824a4e
AZ
2419
2420static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2421 [MANAGED_OOM_AUTO] = "auto",
2422 [MANAGED_OOM_KILL] = "kill",
2423};
2424
2425DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
242d75bd
AZ
2426
2427static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2428 [MANAGED_OOM_PREFERENCE_NONE] = "none",
2429 [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2430 [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2431};
2432
2433DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);