]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
nspawn, vmspawn, run0: add env var for turning off background tinting
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
8c6db833
LP
2
3#include <errno.h>
11c3a366 4#include <limits.h>
8c6db833 5#include <signal.h>
11c3a366 6#include <stddef.h>
8c6db833 7#include <stdlib.h>
672c48cc 8#include <sys/types.h>
f98c2585 9#include <sys/utsname.h>
4b58153d 10#include <sys/xattr.h>
84ac7bea 11#include <unistd.h>
8c6db833 12
b5efdb8a 13#include "alloc-util.h"
3ffd4af2 14#include "cgroup-util.h"
28db6fbf 15#include "constants.h"
a0956174 16#include "dirent-util.h"
84ac7bea 17#include "extract-word.h"
3ffd4af2 18#include "fd-util.h"
84ac7bea 19#include "fileio.h"
f97b34a6 20#include "format-util.h"
f4f15635 21#include "fs-util.h"
93cc7779 22#include "log.h"
84ac7bea
LP
23#include "login-util.h"
24#include "macro.h"
3b287495 25#include "missing_fs.h"
f5947a5e 26#include "missing_magic.h"
5545f336 27#include "missing_threads.h"
84ac7bea 28#include "mkdir.h"
6bedfcbb 29#include "parse-util.h"
9eb977db 30#include "path-util.h"
84ac7bea
LP
31#include "process-util.h"
32#include "set.h"
9444b1f2 33#include "special.h"
872a590e 34#include "stat-util.h"
d054f0a4 35#include "stdio-util.h"
8b43440b 36#include "string-table.h"
07630cea 37#include "string-util.h"
aae7e17f 38#include "strv.h"
84ac7bea 39#include "unit-name.h"
b1d4f8e1 40#include "user-util.h"
baa358df 41#include "xattr-util.h"
8c6db833 42
3b287495
LP
43int cg_path_open(const char *controller, const char *path) {
44 _cleanup_free_ char *fs = NULL;
45 int r;
46
47 r = cg_get_path(controller, path, /* item=*/ NULL, &fs);
48 if (r < 0)
49 return r;
50
51 return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
52}
53
54int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
55 _cleanup_close_ int fsfd = -EBADF;
56
57 if (cgroupfs_fd < 0) {
58 fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
59 if (fsfd < 0)
60 return -errno;
61
62 cgroupfs_fd = fsfd;
63 }
64
65 cg_file_handle fh = CG_FILE_HANDLE_INIT;
66 CG_FILE_HANDLE_CGROUPID(fh) = id;
67
68 int fd = open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC);
69 if (fd < 0)
70 return -errno;
71
72 return fd;
73}
74
aef43552 75static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
7027ff61 76 _cleanup_free_ char *fs = NULL;
c6c18be3 77 FILE *f;
7027ff61 78 int r;
c6c18be3 79
aef43552 80 assert(ret);
c6c18be3 81
e48fcfef 82 r = cg_get_path(controller, path, item, &fs);
c3175a7f 83 if (r < 0)
c6c18be3
LP
84 return r;
85
86 f = fopen(fs, "re");
c6c18be3
LP
87 if (!f)
88 return -errno;
89
aef43552 90 *ret = f;
c6c18be3
LP
91 return 0;
92}
93
aef43552
LP
94int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
95 return cg_enumerate_items(controller, path, ret, "cgroup.procs");
e48fcfef
TM
96}
97
aef43552 98int cg_read_pid(FILE *f, pid_t *ret) {
c6c18be3
LP
99 unsigned long ul;
100
aef43552 101 /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
c6c18be3 102
7027ff61 103 assert(f);
aef43552 104 assert(ret);
7027ff61 105
c6c18be3
LP
106 errno = 0;
107 if (fscanf(f, "%lu", &ul) != 1) {
108
8ab40789
LP
109 if (feof(f)) {
110 *ret = 0;
c6c18be3 111 return 0;
8ab40789 112 }
c6c18be3 113
66855de7 114 return errno_or_else(EIO);
c6c18be3
LP
115 }
116
117 if (ul <= 0)
118 return -EIO;
5ed7cdfb
LP
119 if (ul > PID_T_MAX)
120 return -EIO;
c6c18be3 121
aef43552 122 *ret = (pid_t) ul;
c6c18be3
LP
123 return 1;
124}
125
12c7d27b
LP
126int cg_read_pidref(FILE *f, PidRef *ret) {
127 int r;
128
129 assert(f);
130 assert(ret);
131
132 for (;;) {
133 pid_t pid;
134
135 r = cg_read_pid(f, &pid);
136 if (r < 0)
137 return r;
138 if (r == 0) {
139 *ret = PIDREF_NULL;
140 return 0;
141 }
142
143 r = pidref_set_pid(ret, pid);
144 if (r >= 0)
145 return 1;
146 if (r != -ESRCH)
147 return r;
148
149 /* ESRCH → gone by now? just skip over it, read the next */
150 }
151}
152
8b238b13
LP
153int cg_read_event(
154 const char *controller,
155 const char *path,
156 const char *event,
31a9be23 157 char **ret) {
8b238b13 158
ab2c3861 159 _cleanup_free_ char *events = NULL, *content = NULL;
ab2c3861
TH
160 int r;
161
162 r = cg_get_path(controller, path, "cgroup.events", &events);
163 if (r < 0)
164 return r;
165
627055ce 166 r = read_full_virtual_file(events, &content, NULL);
ab2c3861
TH
167 if (r < 0)
168 return r;
169
31a9be23 170 for (const char *p = content;;) {
8922a728 171 _cleanup_free_ char *line = NULL, *key = NULL;
31a9be23
YW
172 const char *q;
173
174 r = extract_first_word(&p, &line, "\n", 0);
175 if (r < 0)
176 return r;
177 if (r == 0)
178 return -ENOENT;
179
180 q = line;
181 r = extract_first_word(&q, &key, " ", 0);
182 if (r < 0)
183 return r;
184 if (r == 0)
ab2c3861
TH
185 return -EINVAL;
186
31a9be23 187 if (!streq(key, event))
ab2c3861
TH
188 continue;
189
8922a728 190 return strdup_to(ret, q);
ab2c3861 191 }
ab2c3861
TH
192}
193
3228995c
CB
194bool cg_ns_supported(void) {
195 static thread_local int enabled = -1;
196
197 if (enabled >= 0)
198 return enabled;
199
0887fa71
LP
200 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
201 if (errno != ENOENT)
202 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
203 enabled = false;
204 } else
205 enabled = true;
3228995c
CB
206
207 return enabled;
208}
209
d9e45bc3
MS
210bool cg_freezer_supported(void) {
211 static thread_local int supported = -1;
212
213 if (supported >= 0)
214 return supported;
215
216 supported = cg_all_unified() > 0 && access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) == 0;
217
218 return supported;
219}
220
8a513eee
AB
221bool cg_kill_supported(void) {
222 static thread_local int supported = -1;
223
224 if (supported >= 0)
225 return supported;
226
227 if (cg_all_unified() <= 0)
228 supported = false;
229 else if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) < 0) {
230 if (errno != ENOENT)
231 log_debug_errno(errno, "Failed to check if cgroup.kill is available, assuming not: %m");
232 supported = false;
233 } else
234 supported = true;
235
236 return supported;
237}
238
aef43552 239int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
7027ff61 240 _cleanup_free_ char *fs = NULL;
35d2e7ec 241 DIR *d;
aef43552 242 int r;
35d2e7ec 243
aef43552 244 assert(ret);
35d2e7ec
LP
245
246 /* This is not recursive! */
247
c3175a7f
LP
248 r = cg_get_path(controller, path, NULL, &fs);
249 if (r < 0)
35d2e7ec
LP
250 return r;
251
252 d = opendir(fs);
35d2e7ec
LP
253 if (!d)
254 return -errno;
255
aef43552 256 *ret = d;
35d2e7ec
LP
257 return 0;
258}
259
aef43552 260int cg_read_subgroup(DIR *d, char **ret) {
35d2e7ec 261 assert(d);
aef43552 262 assert(ret);
35d2e7ec 263
f01327ad 264 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
265 if (de->d_type != DT_DIR)
266 continue;
267
49bfc877 268 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
269 continue;
270
8922a728 271 return strdup_to_full(ret, de->d_name);
35d2e7ec
LP
272 }
273
8ab40789 274 *ret = NULL;
35d2e7ec
LP
275 return 0;
276}
277
4ad49000 278int cg_rmdir(const char *controller, const char *path) {
7027ff61 279 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
280 int r;
281
ad293f5a
LP
282 r = cg_get_path(controller, path, NULL, &p);
283 if (r < 0)
35d2e7ec
LP
284 return r;
285
286 r = rmdir(p);
7027ff61
LP
287 if (r < 0 && errno != ENOENT)
288 return -errno;
35d2e7ec 289
b4cccbc1 290 r = cg_hybrid_unified();
f20db199 291 if (r <= 0)
b4cccbc1 292 return r;
b4cccbc1
LP
293
294 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
2977724b
TH
295 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
296 if (r < 0)
297 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
298 }
299
7027ff61 300 return 0;
35d2e7ec
LP
301}
302
e48fcfef 303static int cg_kill_items(
1d98fef1
LP
304 const char *path,
305 int sig,
306 CGroupFlags flags,
307 Set *s,
308 cg_kill_log_func_t log_kill,
e48fcfef
TM
309 void *userdata,
310 const char *item) {
1d98fef1 311
7027ff61 312 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 313 bool done = false;
c53d2d54 314 int r, ret = 0, ret_log_kill = 0;
8c6db833 315
8c6db833
LP
316 assert(sig >= 0);
317
0d5b4810
LP
318 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
319 * SIGCONT on SIGKILL. */
320 if (IN_SET(sig, SIGCONT, SIGKILL))
321 flags &= ~CGROUP_SIGCONT;
322
8c6db833
LP
323 /* This goes through the tasks list and kills them all. This
324 * is repeated until no further processes are added to the
325 * tasks list, to properly handle forking processes */
326
7027ff61 327 if (!s) {
d5099efc 328 s = allocated_set = set_new(NULL);
7027ff61 329 if (!s)
ca949c9d 330 return -ENOMEM;
7027ff61 331 }
8c6db833 332
8c6db833 333 do {
7027ff61 334 _cleanup_fclose_ FILE *f = NULL;
8c6db833
LP
335 done = true;
336
bd1791b5 337 r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
f65a40fb
MY
338 if (r == -ENOENT)
339 break;
340 if (r < 0)
341 return RET_GATHER(ret, r);
c6c18be3 342
4d1b2df1
LP
343 for (;;) {
344 _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
345
346 r = cg_read_pidref(f, &pidref);
347 if (r < 0)
f65a40fb 348 return RET_GATHER(ret, r);
4d1b2df1
LP
349 if (r == 0)
350 break;
8c6db833 351
4d1b2df1 352 if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
c6c18be3 353 continue;
8c6db833 354
4d1b2df1 355 if (set_get(s, PID_TO_PTR(pidref.pid)) == PID_TO_PTR(pidref.pid))
c6c18be3 356 continue;
8c6db833 357
1d98fef1 358 if (log_kill)
4d1b2df1 359 ret_log_kill = log_kill(&pidref, sig, userdata);
1d98fef1 360
4d1b2df1
LP
361 /* If we haven't killed this process yet, kill it */
362 r = pidref_kill(&pidref, sig);
f65a40fb
MY
363 if (r < 0 && r != -ESRCH)
364 RET_GATHER(ret, r);
365 if (r >= 0) {
1d98fef1 366 if (flags & CGROUP_SIGCONT)
4d1b2df1 367 (void) pidref_kill(&pidref, SIGCONT);
430c18ed 368
c53d2d54
DB
369 if (ret == 0) {
370 if (log_kill)
371 ret = ret_log_kill;
372 else
373 ret = 1;
374 }
430c18ed 375 }
8c6db833 376
8c6db833
LP
377 done = false;
378
2d790175 379 r = set_put(s, PID_TO_PTR(pidref.pid));
f65a40fb
MY
380 if (r < 0)
381 return RET_GATHER(ret, r);
35d2e7ec
LP
382 }
383
f65a40fb
MY
384 /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
385 * until no new pids need to be killed. */
8c6db833 386
35d2e7ec 387 } while (!done);
8c6db833 388
35d2e7ec 389 return ret;
8c6db833
LP
390}
391
e48fcfef 392int cg_kill(
e48fcfef
TM
393 const char *path,
394 int sig,
395 CGroupFlags flags,
396 Set *s,
397 cg_kill_log_func_t log_kill,
398 void *userdata) {
37f0289b 399
400 int r, ret;
e48fcfef 401
bd1791b5 402 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.procs");
e48fcfef
TM
403 if (r < 0 || sig != SIGKILL)
404 return r;
405
37f0289b 406 ret = r;
407
e48fcfef 408 /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
cda5ccdb
TM
409 a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
410 (4340d175b898) and 4.14.138 (feb6b123b7dd). */
bd1791b5 411 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
37f0289b 412 if (r < 0)
413 return r;
414 if (r == 0)
415 return ret;
416
bd1791b5 417 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.threads");
37f0289b 418 if (r < 0)
e48fcfef 419 return r;
e48fcfef 420
37f0289b 421 return r > 0 || ret > 0;
e48fcfef
TM
422}
423
bd1791b5
LP
424int cg_kill_kernel_sigkill(const char *path) {
425 /* Kills the cgroup at `path` directly by writing to its cgroup.kill file. This sends SIGKILL to all
426 * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
427
8a513eee 428 _cleanup_free_ char *killfile = NULL;
bd1791b5 429 int r;
8a513eee
AB
430
431 assert(path);
432
433 if (!cg_kill_supported())
434 return -EOPNOTSUPP;
435
bd1791b5 436 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
8a513eee
AB
437 if (r < 0)
438 return r;
439
440 r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
441 if (r < 0)
442 return r;
443
444 return 0;
445}
446
1d98fef1 447int cg_kill_recursive(
1d98fef1
LP
448 const char *path,
449 int sig,
450 CGroupFlags flags,
451 Set *s,
452 cg_kill_log_func_t log_kill,
453 void *userdata) {
454
e155a0aa 455 int r, ret;
8c6db833
LP
456
457 assert(path);
8c6db833
LP
458 assert(sig >= 0);
459
8a513eee 460 if (sig == SIGKILL && cg_kill_supported() &&
bd1791b5 461 !FLAGS_SET(flags, CGROUP_IGNORE_SELF) && !s && !log_kill)
8a513eee 462 /* ignore CGROUP_SIGCONT, since this is a no-op alongside SIGKILL */
bd1791b5
LP
463 ret = cg_kill_kernel_sigkill(path);
464 else {
465 _cleanup_set_free_ Set *allocated_set = NULL;
466 _cleanup_closedir_ DIR *d = NULL;
467
8a513eee
AB
468 if (!s) {
469 s = allocated_set = set_new(NULL);
470 if (!s)
471 return -ENOMEM;
472 }
ca949c9d 473
bd1791b5 474 ret = cg_kill(path, sig, flags, s, log_kill, userdata);
8c6db833 475
bd1791b5 476 r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
8a513eee 477 if (r < 0) {
bd1791b5
LP
478 if (r != -ENOENT)
479 RET_GATHER(ret, r);
8c6db833 480
8a513eee
AB
481 return ret;
482 }
8c6db833 483
bd1791b5
LP
484 for (;;) {
485 _cleanup_free_ char *fn = NULL, *p = NULL;
486
487 r = cg_read_subgroup(d, &fn);
488 if (r < 0) {
489 RET_GATHER(ret, r);
490 break;
491 }
492 if (r == 0)
493 break;
8c6db833 494
8a513eee 495 p = path_join(empty_to_root(path), fn);
8a513eee
AB
496 if (!p)
497 return -ENOMEM;
8c6db833 498
bd1791b5 499 r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
8a513eee
AB
500 if (r != 0 && ret >= 0)
501 ret = r;
502 }
8c6db833 503 }
35d2e7ec 504
8a513eee 505 if (FLAGS_SET(flags, CGROUP_REMOVE)) {
bd1791b5
LP
506 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, path);
507 if (!IN_SET(r, -ENOENT, -EBUSY))
508 RET_GATHER(ret, r);
7027ff61 509 }
ca949c9d 510
8c6db833
LP
511 return ret;
512}
513
efdb0237 514static const char *controller_to_dirname(const char *controller) {
7027ff61
LP
515 assert(controller);
516
a561253f
LP
517 /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
518 * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
519 * specified. */
efdb0237 520
2977724b 521 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
b4cccbc1 522 if (cg_hybrid_unified() > 0)
2977724b
TH
523 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
524 else
525 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
526 }
b6629c4b 527
a561253f 528 return startswith(controller, "name=") ?: controller;
3474ae3c
LP
529}
530
a561253f 531static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
569b19d8 532 const char *dn;
018ef268 533 char *t = NULL;
3474ae3c 534
a561253f 535 assert(ret);
569b19d8
LP
536 assert(controller);
537
538 dn = controller_to_dirname(controller);
efdb0237
LP
539
540 if (isempty(path) && isempty(suffix))
657ee2d8 541 t = path_join("/sys/fs/cgroup", dn);
efdb0237 542 else if (isempty(path))
657ee2d8 543 t = path_join("/sys/fs/cgroup", dn, suffix);
efdb0237 544 else if (isempty(suffix))
657ee2d8 545 t = path_join("/sys/fs/cgroup", dn, path);
efdb0237 546 else
657ee2d8 547 t = path_join("/sys/fs/cgroup", dn, path, suffix);
efdb0237
LP
548 if (!t)
549 return -ENOMEM;
3474ae3c 550
a561253f 551 *ret = t;
efdb0237
LP
552 return 0;
553}
554
a561253f 555static int join_path_unified(const char *path, const char *suffix, char **ret) {
efdb0237
LP
556 char *t;
557
a561253f 558 assert(ret);
efdb0237
LP
559
560 if (isempty(path) && isempty(suffix))
561 t = strdup("/sys/fs/cgroup");
562 else if (isempty(path))
657ee2d8 563 t = path_join("/sys/fs/cgroup", suffix);
efdb0237 564 else if (isempty(suffix))
657ee2d8 565 t = path_join("/sys/fs/cgroup", path);
efdb0237 566 else
657ee2d8 567 t = path_join("/sys/fs/cgroup", path, suffix);
3474ae3c
LP
568 if (!t)
569 return -ENOMEM;
570
a561253f 571 *ret = t;
3474ae3c
LP
572 return 0;
573}
574
a561253f 575int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
415fc41c 576 int r;
8c6db833 577
a561253f 578 assert(ret);
dbd821ac 579
efdb0237
LP
580 if (!controller) {
581 char *t;
582
a561253f
LP
583 /* If no controller is specified, we return the path *below* the controllers, without any
584 * prefix. */
efdb0237 585
74c60fcb 586 if (isempty(path) && isempty(suffix))
efdb0237
LP
587 return -EINVAL;
588
a561253f 589 if (isempty(suffix))
efdb0237 590 t = strdup(path);
a561253f 591 else if (isempty(path))
efdb0237
LP
592 t = strdup(suffix);
593 else
657ee2d8 594 t = path_join(path, suffix);
efdb0237
LP
595 if (!t)
596 return -ENOMEM;
597
a561253f 598 *ret = path_simplify(t);
efdb0237
LP
599 return 0;
600 }
601
602 if (!cg_controller_is_valid(controller))
78edb35a
LP
603 return -EINVAL;
604
b4cccbc1
LP
605 r = cg_all_unified();
606 if (r < 0)
607 return r;
608 if (r > 0)
a561253f 609 r = join_path_unified(path, suffix, ret);
569b19d8 610 else
a561253f 611 r = join_path_legacy(controller, path, suffix, ret);
efdb0237
LP
612 if (r < 0)
613 return r;
7027ff61 614
a561253f 615 path_simplify(*ret);
efdb0237 616 return 0;
3474ae3c 617}
dbd821ac 618
0fa7b500
MK
619static int controller_is_v1_accessible(const char *root, const char *controller) {
620 const char *cpath, *dn;
37099707 621
efdb0237 622 assert(controller);
37099707 623
81504017 624 dn = controller_to_dirname(controller);
0fa7b500 625
e4645ca5
ZJS
626 /* If root if specified, we check that:
627 * - possible subcgroup is created at root,
628 * - we can modify the hierarchy. */
629
630 cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
cb3763d5 631 return laccess(cpath, root ? W_OK : F_OK);
37099707
LP
632}
633
820fe745 634int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
37099707 635 int r;
dbd821ac 636
efdb0237 637 assert(controller);
820fe745 638 assert(ret);
70132bd0 639
81504017
MK
640 if (!cg_controller_is_valid(controller))
641 return -EINVAL;
642
643 r = cg_all_unified();
37099707
LP
644 if (r < 0)
645 return r;
81504017
MK
646 if (r > 0) {
647 /* In the unified hierarchy all controllers are considered accessible,
648 * except for the named hierarchies */
649 if (startswith(controller, "name="))
650 return -EOPNOTSUPP;
651 } else {
652 /* Check if the specified controller is actually accessible */
0fa7b500 653 r = controller_is_v1_accessible(NULL, controller);
81504017
MK
654 if (r < 0)
655 return r;
656 }
3474ae3c 657
820fe745 658 return cg_get_path(controller, path, suffix, ret);
8c6db833
LP
659}
660
bd1791b5 661int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
4b58153d
LP
662 _cleanup_free_ char *fs = NULL;
663 int r;
664
665 assert(path);
666 assert(name);
667 assert(value || size <= 0);
668
bd1791b5 669 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
4b58153d
LP
670 if (r < 0)
671 return r;
672
7c248223 673 return RET_NERRNO(setxattr(fs, name, value, size, flags));
4b58153d
LP
674}
675
bd1791b5 676int cg_get_xattr(const char *path, const char *name, void *value, size_t size) {
4b58153d
LP
677 _cleanup_free_ char *fs = NULL;
678 ssize_t n;
679 int r;
680
681 assert(path);
682 assert(name);
683
bd1791b5 684 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
4b58153d
LP
685 if (r < 0)
686 return r;
687
688 n = getxattr(fs, name, value, size);
689 if (n < 0)
690 return -errno;
691
692 return (int) n;
693}
694
bd1791b5 695int cg_get_xattr_malloc(const char *path, const char *name, char **ret) {
baa358df
AZ
696 _cleanup_free_ char *fs = NULL;
697 int r;
698
699 assert(path);
700 assert(name);
701
bd1791b5 702 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
baa358df
AZ
703 if (r < 0)
704 return r;
705
bd1791b5 706 return lgetxattr_malloc(fs, name, ret);
baa358df
AZ
707}
708
bd1791b5 709int cg_get_xattr_bool(const char *path, const char *name) {
f0b8ac9e 710 _cleanup_free_ char *fs = NULL;
59331b8e
AZ
711 int r;
712
713 assert(path);
714 assert(name);
715
f0b8ac9e 716 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
59331b8e
AZ
717 if (r < 0)
718 return r;
719
f0b8ac9e 720 return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
59331b8e
AZ
721}
722
bd1791b5 723int cg_remove_xattr(const char *path, const char *name) {
bf25f165
LP
724 _cleanup_free_ char *fs = NULL;
725 int r;
726
727 assert(path);
728 assert(name);
729
bd1791b5 730 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
bf25f165
LP
731 if (r < 0)
732 return r;
733
7c248223 734 return RET_NERRNO(removexattr(fs, name));
bf25f165
LP
735}
736
ae7ef63f 737int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
7027ff61 738 _cleanup_fclose_ FILE *f = NULL;
7756528e 739 const char *fs, *controller_str = NULL; /* avoid false maybe-uninitialized warning */
d2b39cb6 740 int unified, r;
8c6db833 741
c6c18be3 742 assert(pid >= 0);
ae7ef63f 743 assert(ret_path);
8c6db833 744
5da38d07
TH
745 if (controller) {
746 if (!cg_controller_is_valid(controller))
747 return -EINVAL;
748 } else
749 controller = SYSTEMD_CGROUP_CONTROLLER;
750
c22800e4 751 unified = cg_unified_controller(controller);
b4cccbc1
LP
752 if (unified < 0)
753 return unified;
754 if (unified == 0) {
b6629c4b
TH
755 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
756 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
757 else
758 controller_str = controller;
b6629c4b 759 }
7027ff61 760
b68fa010 761 fs = procfs_file_alloca(pid, "cgroup");
fdeea3f4
ZJS
762 r = fopen_unlocked(fs, "re", &f);
763 if (r == -ENOENT)
764 return -ESRCH;
765 if (r < 0)
766 return r;
35bbbf85 767
d2b39cb6
LP
768 for (;;) {
769 _cleanup_free_ char *line = NULL;
ae7ef63f 770 char *e;
c6c18be3 771
d2b39cb6
LP
772 r = read_line(f, LONG_LINE_MAX, &line);
773 if (r < 0)
774 return r;
775 if (r == 0)
ae7ef63f 776 return -ENODATA;
c6c18be3 777
efdb0237
LP
778 if (unified) {
779 e = startswith(line, "0:");
780 if (!e)
781 continue;
c6c18be3 782
efdb0237
LP
783 e = strchr(e, ':');
784 if (!e)
785 continue;
786 } else {
787 char *l;
efdb0237
LP
788
789 l = strchr(line, ':');
790 if (!l)
791 continue;
8af8afd6 792
efdb0237
LP
793 l++;
794 e = strchr(l, ':');
795 if (!e)
796 continue;
efdb0237 797 *e = 0;
ae7ef63f 798
bc20c31b 799 assert(controller_str);
ae7ef63f
ZJS
800 r = string_contains_word(l, ",", controller_str);
801 if (r < 0)
802 return r;
803 if (r == 0)
efdb0237 804 continue;
8af8afd6
LP
805 }
806
ae7ef63f
ZJS
807 char *path = strdup(e + 1);
808 if (!path)
7027ff61 809 return -ENOMEM;
c6c18be3 810
5e20b0a4 811 /* Truncate suffix indicating the process is a zombie */
ae7ef63f 812 e = endswith(path, " (deleted)");
5e20b0a4
LP
813 if (e)
814 *e = 0;
815
ae7ef63f 816 *ret_path = path;
7027ff61 817 return 0;
c6c18be3 818 }
8c6db833
LP
819}
820
f2a2e60b 821int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
a9062242
LP
822 _cleanup_free_ char *path = NULL;
823 int r;
824
825 assert(ret_path);
826
827 if (!pidref_is_set(pidref))
828 return -ESRCH;
829
830 r = cg_pid_get_path(controller, pidref->pid, &path);
831 if (r < 0)
832 return r;
833
834 /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
835 r = pidref_verify(pidref);
836 if (r < 0)
837 return r;
838
839 *ret_path = TAKE_PTR(path);
840 return 0;
841}
842
8c6db833 843int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 844 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 845 const char *sc;
415fc41c 846 int r;
8c6db833 847
8c6db833
LP
848 assert(agent);
849
c22800e4 850 r = cg_unified_controller(controller);
b4cccbc1
LP
851 if (r < 0)
852 return r;
853 if (r > 0) /* doesn't apply to unified hierarchy */
efdb0237
LP
854 return -EOPNOTSUPP;
855
7027ff61
LP
856 r = cg_get_path(controller, NULL, "release_agent", &fs);
857 if (r < 0)
c6c18be3 858 return r;
8c6db833 859
7027ff61
LP
860 r = read_one_line_file(fs, &contents);
861 if (r < 0)
862 return r;
8c6db833
LP
863
864 sc = strstrip(contents);
e155a0aa 865 if (isempty(sc)) {
604028de 866 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
574d5f2d 867 if (r < 0)
7027ff61 868 return r;
b8725df8 869 } else if (!path_equal(sc, agent))
7027ff61 870 return -EEXIST;
8c6db833 871
0da16248 872 fs = mfree(fs);
7027ff61
LP
873 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
874 if (r < 0)
875 return r;
8c6db833 876
0da16248 877 contents = mfree(contents);
7027ff61
LP
878 r = read_one_line_file(fs, &contents);
879 if (r < 0)
880 return r;
8c6db833
LP
881
882 sc = strstrip(contents);
8c6db833 883 if (streq(sc, "0")) {
604028de 884 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
7027ff61
LP
885 if (r < 0)
886 return r;
c6c18be3 887
7027ff61
LP
888 return 1;
889 }
8c6db833 890
7027ff61
LP
891 if (!streq(sc, "1"))
892 return -EIO;
8c6db833 893
7027ff61 894 return 0;
8c6db833
LP
895}
896
ad929bcc
KS
897int cg_uninstall_release_agent(const char *controller) {
898 _cleanup_free_ char *fs = NULL;
415fc41c 899 int r;
efdb0237 900
c22800e4 901 r = cg_unified_controller(controller);
b4cccbc1
LP
902 if (r < 0)
903 return r;
904 if (r > 0) /* Doesn't apply to unified hierarchy */
efdb0237 905 return -EOPNOTSUPP;
ad929bcc 906
ac9ef333
LP
907 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
908 if (r < 0)
909 return r;
910
604028de 911 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
ac9ef333
LP
912 if (r < 0)
913 return r;
914
0da16248 915 fs = mfree(fs);
ac9ef333 916
ad929bcc
KS
917 r = cg_get_path(controller, NULL, "release_agent", &fs);
918 if (r < 0)
919 return r;
920
604028de 921 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
ad929bcc
KS
922 if (r < 0)
923 return r;
924
ac9ef333 925 return 0;
ad929bcc
KS
926}
927
6f883237 928int cg_is_empty(const char *controller, const char *path) {
7027ff61 929 _cleanup_fclose_ FILE *f = NULL;
efdb0237 930 pid_t pid;
7027ff61 931 int r;
8c6db833 932
8c6db833
LP
933 assert(path);
934
b043cd0b 935 r = cg_enumerate_processes(controller, path, &f);
6f883237 936 if (r == -ENOENT)
1bcf3fc6 937 return true;
c3175a7f 938 if (r < 0)
6f883237 939 return r;
8c6db833 940
6f883237 941 r = cg_read_pid(f, &pid);
c6c18be3
LP
942 if (r < 0)
943 return r;
8c6db833 944
6f883237 945 return r == 0;
8c6db833
LP
946}
947
6f883237 948int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 949 int r;
8c6db833 950
8c6db833
LP
951 assert(path);
952
6fd66507 953 /* The root cgroup is always populated */
57ea45e1 954 if (controller && empty_or_root(path))
efdb0237 955 return false;
6fd66507 956
c22800e4 957 r = cg_unified_controller(controller);
b4cccbc1
LP
958 if (r < 0)
959 return r;
960 if (r > 0) {
ab2c3861 961 _cleanup_free_ char *t = NULL;
8c6db833 962
efdb0237 963 /* On the unified hierarchy we can check empty state
ab2c3861 964 * via the "populated" attribute of "cgroup.events". */
8c6db833 965
ab2c3861 966 r = cg_read_event(controller, path, "populated", &t);
1bcf3fc6
ZJS
967 if (r == -ENOENT)
968 return true;
efdb0237
LP
969 if (r < 0)
970 return r;
971
972 return streq(t, "0");
973 } else {
974 _cleanup_closedir_ DIR *d = NULL;
975 char *fn;
8c6db833 976
efdb0237 977 r = cg_is_empty(controller, path);
35d2e7ec 978 if (r <= 0)
7027ff61 979 return r;
35d2e7ec 980
efdb0237
LP
981 r = cg_enumerate_subgroups(controller, path, &d);
982 if (r == -ENOENT)
1bcf3fc6 983 return true;
efdb0237
LP
984 if (r < 0)
985 return r;
35d2e7ec 986
efdb0237
LP
987 while ((r = cg_read_subgroup(d, &fn)) > 0) {
988 _cleanup_free_ char *p = NULL;
989
657ee2d8 990 p = path_join(path, fn);
efdb0237
LP
991 free(fn);
992 if (!p)
993 return -ENOMEM;
994
995 r = cg_is_empty_recursive(controller, p);
996 if (r <= 0)
997 return r;
998 }
999 if (r < 0)
1000 return r;
1001
1002 return true;
1003 }
35d2e7ec
LP
1004}
1005
2a8020fe
ZJS
1006int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
1007 _cleanup_free_ char *controller = NULL, *path = NULL;
660087dc 1008 int r;
35d2e7ec
LP
1009
1010 assert(spec);
35d2e7ec
LP
1011
1012 if (*spec == '/') {
99be45a4 1013 if (!path_is_normalized(spec))
e884315e 1014 return -EINVAL;
35d2e7ec 1015
2a8020fe 1016 if (ret_path) {
660087dc
ZJS
1017 r = path_simplify_alloc(spec, &path);
1018 if (r < 0)
1019 return r;
8c6db833
LP
1020 }
1021
2a8020fe
ZJS
1022 } else {
1023 const char *e;
35d2e7ec 1024
2a8020fe
ZJS
1025 e = strchr(spec, ':');
1026 if (e) {
1027 controller = strndup(spec, e-spec);
1028 if (!controller)
35d2e7ec 1029 return -ENOMEM;
2a8020fe
ZJS
1030 if (!cg_controller_is_valid(controller))
1031 return -EINVAL;
35d2e7ec 1032
2a8020fe
ZJS
1033 if (!isempty(e + 1)) {
1034 path = strdup(e+1);
1035 if (!path)
1036 return -ENOMEM;
35d2e7ec 1037
2a8020fe
ZJS
1038 if (!path_is_normalized(path) ||
1039 !path_is_absolute(path))
1040 return -EINVAL;
8c6db833 1041
4ff361cc 1042 path_simplify(path);
2a8020fe 1043 }
246aa6dd 1044
2a8020fe
ZJS
1045 } else {
1046 if (!cg_controller_is_valid(spec))
1047 return -EINVAL;
35d2e7ec 1048
2a8020fe
ZJS
1049 if (ret_controller) {
1050 controller = strdup(spec);
1051 if (!controller)
1052 return -ENOMEM;
1053 }
baa89da4 1054 }
baa89da4 1055 }
5954c074 1056
2a8020fe
ZJS
1057 if (ret_controller)
1058 *ret_controller = TAKE_PTR(controller);
1059 if (ret_path)
1060 *ret_path = TAKE_PTR(path);
35d2e7ec 1061 return 0;
8c6db833 1062}
c6c18be3 1063
820fe745 1064int cg_mangle_path(const char *path, char **ret) {
78edb35a 1065 _cleanup_free_ char *c = NULL, *p = NULL;
35d2e7ec
LP
1066 int r;
1067
1068 assert(path);
820fe745 1069 assert(ret);
35d2e7ec 1070
73e231ab 1071 /* First, check if it already is a filesystem path */
660087dc 1072 if (path_startswith(path, "/sys/fs/cgroup"))
820fe745 1073 return path_simplify_alloc(path, ret);
35d2e7ec 1074
73e231ab 1075 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1076 r = cg_split_spec(path, &c, &p);
1077 if (r < 0)
35d2e7ec
LP
1078 return r;
1079
820fe745 1080 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
35d2e7ec 1081}
1f73f0f1 1082
820fe745 1083int cg_get_root_path(char **ret_path) {
9444b1f2 1084 char *p, *e;
7027ff61
LP
1085 int r;
1086
820fe745 1087 assert(ret_path);
7027ff61 1088
9444b1f2 1089 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1090 if (r < 0)
1091 return r;
1092
efdb0237
LP
1093 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1094 if (!e)
1095 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1096 if (!e)
1097 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1098 if (e)
7027ff61
LP
1099 *e = 0;
1100
820fe745 1101 *ret_path = p;
1f73f0f1
LP
1102 return 0;
1103}
b59e2465 1104
820fe745 1105int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
751bc6ac
LP
1106 _cleanup_free_ char *rt = NULL;
1107 char *p;
ba1261bc
LP
1108 int r;
1109
e9174f29 1110 assert(cgroup);
820fe745 1111 assert(ret_shifted);
e9174f29
LP
1112
1113 if (!root) {
1114 /* If the root was specified let's use that, otherwise
1115 * let's determine it from PID 1 */
1116
751bc6ac 1117 r = cg_get_root_path(&rt);
e9174f29
LP
1118 if (r < 0)
1119 return r;
1120
751bc6ac 1121 root = rt;
e9174f29 1122 }
ba1261bc 1123
751bc6ac 1124 p = path_startswith(cgroup, root);
efdb0237 1125 if (p && p > cgroup)
820fe745 1126 *ret_shifted = p - 1;
751bc6ac 1127 else
820fe745 1128 *ret_shifted = cgroup;
751bc6ac
LP
1129
1130 return 0;
1131}
1132
820fe745 1133int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
751bc6ac
LP
1134 _cleanup_free_ char *raw = NULL;
1135 const char *c;
1136 int r;
1137
1138 assert(pid >= 0);
820fe745 1139 assert(ret_cgroup);
751bc6ac
LP
1140
1141 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1142 if (r < 0)
ba1261bc 1143 return r;
ba1261bc 1144
751bc6ac
LP
1145 r = cg_shift_path(raw, root, &c);
1146 if (r < 0)
1147 return r;
ba1261bc 1148
8922a728 1149 if (c == raw) {
820fe745 1150 *ret_cgroup = TAKE_PTR(raw);
8922a728 1151 return 0;
751bc6ac 1152 }
ba1261bc 1153
8922a728 1154 return strdup_to(ret_cgroup, c);
ba1261bc
LP
1155}
1156
820fe745 1157int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
ef1673d1 1158 assert(cgroup);
820fe745 1159 assert(ret_unit);
ef1673d1 1160
8922a728 1161 size_t n = strcspn(cgroup, "/");
8b0849e9
LP
1162 if (n < 3)
1163 return -ENXIO;
1164
8922a728 1165 char *c = strndupa_safe(cgroup, n);
ae018d9b 1166 c = cg_unescape(c);
ef1673d1 1167
7410616c 1168 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1169 return -ENXIO;
ef1673d1 1170
8922a728 1171 return strdup_to(ret_unit, c);
ef1673d1
MT
1172}
1173
8b0849e9
LP
1174static bool valid_slice_name(const char *p, size_t n) {
1175
1176 if (!p)
1177 return false;
1178
fbd0b64f 1179 if (n < STRLEN("x.slice"))
8b0849e9
LP
1180 return false;
1181
1182 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1183 char buf[n+1], *c;
1184
1185 memcpy(buf, p, n);
1186 buf[n] = 0;
1187
1188 c = cg_unescape(buf);
1189
7410616c 1190 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1191 }
1192
1193 return false;
1194}
1195
9444b1f2 1196static const char *skip_slices(const char *p) {
8b0849e9
LP
1197 assert(p);
1198
9444b1f2
LP
1199 /* Skips over all slice assignments */
1200
1201 for (;;) {
1021b21b
LP
1202 size_t n;
1203
9444b1f2
LP
1204 p += strspn(p, "/");
1205
1206 n = strcspn(p, "/");
8b0849e9 1207 if (!valid_slice_name(p, n))
9444b1f2
LP
1208 return p;
1209
1210 p += n;
1211 }
1212}
1213
8b0849e9 1214int cg_path_get_unit(const char *path, char **ret) {
fe96c0f8 1215 _cleanup_free_ char *unit = NULL;
6c03089c 1216 const char *e;
8b0849e9 1217 int r;
6c03089c
LP
1218
1219 assert(path);
8b0849e9 1220 assert(ret);
6c03089c 1221
9444b1f2 1222 e = skip_slices(path);
6c03089c 1223
8b0849e9
LP
1224 r = cg_path_decode_unit(e, &unit);
1225 if (r < 0)
1226 return r;
1227
1228 /* We skipped over the slices, don't accept any now */
fe96c0f8 1229 if (endswith(unit, ".slice"))
8b0849e9 1230 return -ENXIO;
8b0849e9 1231
fe96c0f8 1232 *ret = TAKE_PTR(unit);
8b0849e9 1233 return 0;
6c03089c
LP
1234}
1235
ee164216
QD
1236int cg_path_get_unit_path(const char *path, char **ret) {
1237 _cleanup_free_ char *path_copy = NULL;
1238 char *unit_name;
1239
1240 assert(path);
1241 assert(ret);
1242
1243 path_copy = strdup(path);
1244 if (!path_copy)
1245 return -ENOMEM;
1246
1247 unit_name = (char *)skip_slices(path_copy);
1248 unit_name[strcspn(unit_name, "/")] = 0;
1249
1250 if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1251 return -ENXIO;
1252
1253 *ret = TAKE_PTR(path_copy);
1254
1255 return 0;
1256}
1257
820fe745 1258int cg_pid_get_unit(pid_t pid, char **ret_unit) {
7fd1b19b 1259 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1260 int r;
ba1261bc 1261
820fe745 1262 assert(ret_unit);
ef1673d1 1263
7027ff61 1264 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1265 if (r < 0)
1266 return r;
1267
820fe745 1268 return cg_path_get_unit(cgroup, ret_unit);
6c03089c 1269}
ef1673d1 1270
2c302e89
MY
1271int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
1272 _cleanup_free_ char *unit = NULL;
1273 int r;
1274
1275 assert(ret);
1276
1277 if (!pidref_is_set(pidref))
1278 return -ESRCH;
1279
1280 r = cg_pid_get_unit(pidref->pid, &unit);
1281 if (r < 0)
1282 return r;
1283
1284 r = pidref_verify(pidref);
1285 if (r < 0)
1286 return r;
1287
1288 *ret = TAKE_PTR(unit);
1289 return 0;
1290}
1291
d4fffc4b
ZJS
1292/**
1293 * Skip session-*.scope, but require it to be there.
1294 */
9444b1f2
LP
1295static const char *skip_session(const char *p) {
1296 size_t n;
1297
8b0849e9
LP
1298 if (isempty(p))
1299 return NULL;
9444b1f2
LP
1300
1301 p += strspn(p, "/");
1302
1303 n = strcspn(p, "/");
fbd0b64f 1304 if (n < STRLEN("session-x.scope"))
d4fffc4b
ZJS
1305 return NULL;
1306
8b0849e9
LP
1307 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1308 char buf[n - 8 - 6 + 1];
1309
1310 memcpy(buf, p + 8, n - 8 - 6);
1311 buf[n - 8 - 6] = 0;
d4fffc4b 1312
8b0849e9
LP
1313 /* Note that session scopes never need unescaping,
1314 * since they cannot conflict with the kernel's own
1315 * names, hence we don't need to call cg_unescape()
1316 * here. */
1317
1318 if (!session_id_valid(buf))
db8e7209 1319 return NULL;
8b0849e9
LP
1320
1321 p += n;
1322 p += strspn(p, "/");
1323 return p;
1324 }
1325
1326 return NULL;
d4fffc4b
ZJS
1327}
1328
1329/**
1330 * Skip user@*.service, but require it to be there.
1331 */
1332static const char *skip_user_manager(const char *p) {
1333 size_t n;
1334
8b0849e9
LP
1335 if (isempty(p))
1336 return NULL;
d4fffc4b
ZJS
1337
1338 p += strspn(p, "/");
1339
1340 n = strcspn(p, "/");
fbd0b64f 1341 if (n < STRLEN("user@x.service"))
6c03089c 1342 return NULL;
ef1673d1 1343
8b0849e9
LP
1344 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1345 char buf[n - 5 - 8 + 1];
9444b1f2 1346
8b0849e9
LP
1347 memcpy(buf, p + 5, n - 5 - 8);
1348 buf[n - 5 - 8] = 0;
1349
1350 /* Note that user manager services never need unescaping,
1351 * since they cannot conflict with the kernel's own
1352 * names, hence we don't need to call cg_unescape()
1353 * here. */
1354
1355 if (parse_uid(buf, NULL) < 0)
1356 return NULL;
1357
1358 p += n;
1359 p += strspn(p, "/");
1360
1361 return p;
1362 }
1363
1364 return NULL;
9444b1f2
LP
1365}
1366
329ac4bc 1367static const char *skip_user_prefix(const char *path) {
d4fffc4b 1368 const char *e, *t;
ef1673d1 1369
6c03089c 1370 assert(path);
ba1261bc 1371
9444b1f2
LP
1372 /* Skip slices, if there are any */
1373 e = skip_slices(path);
ba1261bc 1374
329ac4bc 1375 /* Skip the user manager, if it's in the path now... */
8b0849e9 1376 t = skip_user_manager(e);
329ac4bc
LP
1377 if (t)
1378 return t;
8b0849e9 1379
329ac4bc
LP
1380 /* Alternatively skip the user session if it is in the path... */
1381 return skip_session(e);
1382}
32081481 1383
329ac4bc
LP
1384int cg_path_get_user_unit(const char *path, char **ret) {
1385 const char *t;
6c03089c 1386
329ac4bc
LP
1387 assert(path);
1388 assert(ret);
8b0849e9 1389
329ac4bc
LP
1390 t = skip_user_prefix(path);
1391 if (!t)
8b0849e9 1392 return -ENXIO;
8b0849e9 1393
bf21be10
LP
1394 /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1395 * parser. */
329ac4bc 1396 return cg_path_get_unit(t, ret);
ef1673d1 1397}
ba1261bc 1398
820fe745 1399int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
7fd1b19b 1400 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1401 int r;
1402
820fe745 1403 assert(ret_unit);
6c03089c 1404
7027ff61 1405 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1406 if (r < 0)
1407 return r;
1408
820fe745 1409 return cg_path_get_user_unit(cgroup, ret_unit);
ba1261bc 1410}
e884315e 1411
820fe745 1412int cg_path_get_machine_name(const char *path, char **ret_machine) {
efdb0237
LP
1413 _cleanup_free_ char *u = NULL;
1414 const char *sl;
89f7c846 1415 int r;
374ec6ab 1416
89f7c846
LP
1417 r = cg_path_get_unit(path, &u);
1418 if (r < 0)
1419 return r;
7027ff61 1420
efdb0237 1421 sl = strjoina("/run/systemd/machines/unit:", u);
820fe745 1422 return readlink_malloc(sl, ret_machine);
7027ff61
LP
1423}
1424
820fe745 1425int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
7fd1b19b 1426 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1427 int r;
1428
820fe745 1429 assert(ret_machine);
7027ff61
LP
1430
1431 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1432 if (r < 0)
1433 return r;
1434
820fe745 1435 return cg_path_get_machine_name(cgroup, ret_machine);
7027ff61
LP
1436}
1437
535e3dd0 1438int cg_path_get_cgroupid(const char *path, uint64_t *ret) {
a5edf95e 1439 cg_file_handle fh = CG_FILE_HANDLE_INIT;
3b287495 1440 int mnt_id;
535e3dd0
ILG
1441
1442 assert(path);
1443 assert(ret);
1444
535e3dd0
ILG
1445 /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
1446 * name_to_handle_at_loop() does in mountpoint-util.c */
a5edf95e 1447 if (name_to_handle_at(AT_FDCWD, path, &fh.file_handle, &mnt_id, 0) < 0)
535e3dd0
ILG
1448 return -errno;
1449
a5edf95e 1450 *ret = CG_FILE_HANDLE_CGROUPID(fh);
535e3dd0
ILG
1451 return 0;
1452}
1453
3b287495
LP
1454int cg_fd_get_cgroupid(int fd, uint64_t *ret) {
1455 cg_file_handle fh = CG_FILE_HANDLE_INIT;
1456 int mnt_id = -1;
1457
1458 assert(fd >= 0);
1459 assert(ret);
1460
1461 if (name_to_handle_at(fd, "", &fh.file_handle, &mnt_id, AT_EMPTY_PATH) < 0)
1462 return -errno;
1463
1464 *ret = CG_FILE_HANDLE_CGROUPID(fh);
1465 return 0;
1466}
1467
820fe745 1468int cg_path_get_session(const char *path, char **ret_session) {
8b0849e9
LP
1469 _cleanup_free_ char *unit = NULL;
1470 char *start, *end;
1471 int r;
7027ff61
LP
1472
1473 assert(path);
7027ff61 1474
8b0849e9
LP
1475 r = cg_path_get_unit(path, &unit);
1476 if (r < 0)
1477 return r;
7027ff61 1478
8b0849e9
LP
1479 start = startswith(unit, "session-");
1480 if (!start)
cfeaa44a 1481 return -ENXIO;
8b0849e9
LP
1482 end = endswith(start, ".scope");
1483 if (!end)
cfeaa44a 1484 return -ENXIO;
8b0849e9
LP
1485
1486 *end = 0;
1487 if (!session_id_valid(start))
cfeaa44a 1488 return -ENXIO;
374ec6ab 1489
8922a728
ZJS
1490 if (!ret_session)
1491 return 0;
7027ff61 1492
8922a728 1493 return strdup_to(ret_session, start);
7027ff61
LP
1494}
1495
820fe745 1496int cg_pid_get_session(pid_t pid, char **ret_session) {
7fd1b19b 1497 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1498 int r;
1499
7027ff61
LP
1500 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1501 if (r < 0)
1502 return r;
1503
820fe745 1504 return cg_path_get_session(cgroup, ret_session);
7027ff61
LP
1505}
1506
820fe745 1507int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
374ec6ab 1508 _cleanup_free_ char *slice = NULL;
8b0849e9 1509 char *start, *end;
374ec6ab 1510 int r;
ae018d9b
LP
1511
1512 assert(path);
ae018d9b 1513
374ec6ab
LP
1514 r = cg_path_get_slice(path, &slice);
1515 if (r < 0)
1516 return r;
ae018d9b 1517
674eb685
LP
1518 start = startswith(slice, "user-");
1519 if (!start)
cfeaa44a 1520 return -ENXIO;
820fe745 1521
8b0849e9 1522 end = endswith(start, ".slice");
674eb685 1523 if (!end)
cfeaa44a 1524 return -ENXIO;
ae018d9b 1525
8b0849e9 1526 *end = 0;
820fe745 1527 if (parse_uid(start, ret_uid) < 0)
cfeaa44a 1528 return -ENXIO;
674eb685 1529
674eb685 1530 return 0;
ae018d9b
LP
1531}
1532
820fe745 1533int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
ae018d9b
LP
1534 _cleanup_free_ char *cgroup = NULL;
1535 int r;
1536
ae018d9b
LP
1537 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1538 if (r < 0)
1539 return r;
1540
820fe745 1541 return cg_path_get_owner_uid(cgroup, ret_uid);
ae018d9b
LP
1542}
1543
820fe745 1544int cg_path_get_slice(const char *p, char **ret_slice) {
1021b21b 1545 const char *e = NULL;
1021b21b
LP
1546
1547 assert(p);
820fe745 1548 assert(ret_slice);
1021b21b 1549
66d82602
MY
1550 /* Finds the right-most slice unit from the beginning, but stops before we come to
1551 * the first non-slice unit. */
329ac4bc 1552
1021b21b 1553 for (;;) {
66d82602
MY
1554 const char *s;
1555 int n;
1021b21b 1556
66d82602
MY
1557 n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
1558 if (n < 0)
1559 return n;
1560 if (!valid_slice_name(s, n))
8922a728 1561 break;
1021b21b 1562
66d82602 1563 e = s;
1021b21b 1564 }
8922a728
ZJS
1565
1566 if (e)
1567 return cg_path_decode_unit(e, ret_slice);
66d82602 1568
8922a728 1569 return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
1021b21b
LP
1570}
1571
820fe745 1572int cg_pid_get_slice(pid_t pid, char **ret_slice) {
1021b21b
LP
1573 _cleanup_free_ char *cgroup = NULL;
1574 int r;
1575
820fe745 1576 assert(ret_slice);
1021b21b
LP
1577
1578 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1579 if (r < 0)
1580 return r;
1581
820fe745 1582 return cg_path_get_slice(cgroup, ret_slice);
1021b21b
LP
1583}
1584
820fe745 1585int cg_path_get_user_slice(const char *p, char **ret_slice) {
329ac4bc
LP
1586 const char *t;
1587 assert(p);
820fe745 1588 assert(ret_slice);
329ac4bc
LP
1589
1590 t = skip_user_prefix(p);
1591 if (!t)
1592 return -ENXIO;
1593
820fe745
LP
1594 /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1595 * from here on. */
1596 return cg_path_get_slice(t, ret_slice);
329ac4bc
LP
1597}
1598
820fe745 1599int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
329ac4bc
LP
1600 _cleanup_free_ char *cgroup = NULL;
1601 int r;
1602
820fe745 1603 assert(ret_slice);
329ac4bc
LP
1604
1605 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1606 if (r < 0)
1607 return r;
1608
820fe745 1609 return cg_path_get_user_slice(cgroup, ret_slice);
329ac4bc
LP
1610}
1611
1a56b0c0 1612bool cg_needs_escape(const char *p) {
ae018d9b 1613
1a56b0c0
LP
1614 /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1615 * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1616 * they shall be used. Also note that various names cannot be made valid by escaping even if we
1617 * return true here (because too long, or contain the forbidden character "/"). */
ae018d9b 1618
1a56b0c0
LP
1619 if (!filename_is_valid(p))
1620 return true;
efdb0237 1621
1a56b0c0
LP
1622 if (IN_SET(p[0], '_', '.'))
1623 return true;
ae018d9b 1624
1a56b0c0
LP
1625 if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
1626 return true;
ae018d9b 1627
1a56b0c0
LP
1628 if (startswith(p, "cgroup."))
1629 return true;
efdb0237 1630
1a56b0c0
LP
1631 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1632 const char *q;
1633
1634 q = startswith(p, cgroup_controller_to_string(c));
1635 if (!q)
1636 continue;
1637
1638 if (q[0] == '.')
1639 return true;
ae018d9b
LP
1640 }
1641
1a56b0c0
LP
1642 return false;
1643}
efdb0237 1644
1a56b0c0
LP
1645int cg_escape(const char *p, char **ret) {
1646 _cleanup_free_ char *n = NULL;
1647
1648 /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1649 * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1650 * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1651 * is one. */
1652
1653 /* The return value of this function (unlike cg_unescape()) needs free()! */
1654
1655 if (cg_needs_escape(p)) {
1656 n = strjoin("_", p);
1657 if (!n)
1658 return -ENOMEM;
1659
1660 if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
1661 return -EINVAL;
1662 } else {
1663 n = strdup(p);
1664 if (!n)
1665 return -ENOMEM;
1666 }
1667
1668 *ret = TAKE_PTR(n);
1669 return 0;
ae018d9b
LP
1670}
1671
1672char *cg_unescape(const char *p) {
1673 assert(p);
1674
1675 /* The return value of this function (unlike cg_escape())
1676 * doesn't need free()! */
1677
1678 if (p[0] == '_')
1679 return (char*) p+1;
1680
1681 return (char*) p;
1682}
78edb35a
LP
1683
1684#define CONTROLLER_VALID \
4b549144 1685 DIGITS LETTERS \
78edb35a
LP
1686 "_"
1687
185a0874 1688bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1689 const char *t, *s;
1690
1691 if (!p)
1692 return false;
1693
b6629c4b
TH
1694 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1695 return true;
1696
185a0874
DJL
1697 s = startswith(p, "name=");
1698 if (s)
1699 p = s;
78edb35a 1700
4c701096 1701 if (IN_SET(*p, 0, '_'))
78edb35a
LP
1702 return false;
1703
1704 for (t = p; *t; t++)
1705 if (!strchr(CONTROLLER_VALID, *t))
1706 return false;
1707
8ca94009 1708 if (t - p > NAME_MAX)
78edb35a
LP
1709 return false;
1710
1711 return true;
1712}
a016b922
LP
1713
1714int cg_slice_to_path(const char *unit, char **ret) {
1715 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1716 const char *dash;
7410616c 1717 int r;
a016b922
LP
1718
1719 assert(unit);
1720 assert(ret);
1721
8922a728
ZJS
1722 if (streq(unit, SPECIAL_ROOT_SLICE))
1723 return strdup_to(ret, "");
c96cc582 1724
7410616c 1725 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1726 return -EINVAL;
1727
1728 if (!endswith(unit, ".slice"))
1729 return -EINVAL;
1730
7410616c
LP
1731 r = unit_name_to_prefix(unit, &p);
1732 if (r < 0)
1733 return r;
a016b922
LP
1734
1735 dash = strchr(p, '-');
e66e5b61
LP
1736
1737 /* Don't allow initial dashes */
1738 if (dash == p)
1739 return -EINVAL;
1740
a016b922
LP
1741 while (dash) {
1742 _cleanup_free_ char *escaped = NULL;
1743 char n[dash - p + sizeof(".slice")];
1744
989290db 1745#if HAS_FEATURE_MEMORY_SANITIZER
1c56d501 1746 /* msan doesn't instrument stpncpy, so it thinks
5238e957 1747 * n is later used uninitialized:
1c56d501
ZJS
1748 * https://github.com/google/sanitizers/issues/926
1749 */
1750 zero(n);
1751#endif
1752
e66e5b61 1753 /* Don't allow trailing or double dashes */
4c701096 1754 if (IN_SET(dash[1], 0, '-'))
c96cc582 1755 return -EINVAL;
a016b922 1756
c96cc582 1757 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1758 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1759 return -EINVAL;
1760
1a56b0c0
LP
1761 r = cg_escape(n, &escaped);
1762 if (r < 0)
1763 return r;
a016b922 1764
c2bc710b 1765 if (!strextend(&s, escaped, "/"))
a016b922
LP
1766 return -ENOMEM;
1767
1768 dash = strchr(dash+1, '-');
1769 }
1770
1a56b0c0
LP
1771 r = cg_escape(unit, &e);
1772 if (r < 0)
1773 return r;
a016b922 1774
c2bc710b 1775 if (!strextend(&s, e))
a016b922
LP
1776 return -ENOMEM;
1777
ae2a15bc 1778 *ret = TAKE_PTR(s);
a016b922
LP
1779 return 0;
1780}
4ad49000 1781
bd1791b5 1782int cg_is_threaded(const char *path) {
084e7706
YW
1783 _cleanup_free_ char *fs = NULL, *contents = NULL;
1784 _cleanup_strv_free_ char **v = NULL;
1785 int r;
1786
bd1791b5 1787 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
084e7706
YW
1788 if (r < 0)
1789 return r;
1790
1791 r = read_full_virtual_file(fs, &contents, NULL);
1792 if (r == -ENOENT)
1793 return false; /* Assume no. */
1794 if (r < 0)
1795 return r;
1796
1797 v = strv_split(contents, NULL);
1798 if (!v)
1799 return -ENOMEM;
1800
1801 /* If the cgroup is in the threaded mode, it contains "threaded".
1802 * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1803 return strv_contains(v, "threaded") || strv_contains(v, "invalid");
1804}
1805
4ad49000
LP
1806int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1807 _cleanup_free_ char *p = NULL;
1808 int r;
1809
1810 r = cg_get_path(controller, path, attribute, &p);
1811 if (r < 0)
1812 return r;
1813
604028de 1814 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
4ad49000
LP
1815}
1816
934277fe
LP
1817int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1818 _cleanup_free_ char *p = NULL;
1819 int r;
1820
1821 r = cg_get_path(controller, path, attribute, &p);
1822 if (r < 0)
1823 return r;
1824
1825 return read_one_line_file(p, ret);
1826}
1827
613328c3
AZ
1828int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
1829 _cleanup_free_ char *value = NULL;
1830 uint64_t v;
1831 int r;
1832
1833 assert(ret);
1834
1835 r = cg_get_attribute(controller, path, attribute, &value);
1836 if (r == -ENOENT)
1837 return -ENODATA;
1838 if (r < 0)
1839 return r;
1840
1841 if (streq(value, "max")) {
1842 *ret = CGROUP_LIMIT_MAX;
1843 return 0;
1844 }
1845
1846 r = safe_atou64(value, &v);
1847 if (r < 0)
1848 return r;
1849
1850 *ret = v;
1851 return 0;
1852}
1853
b41dcc51
AZ
1854int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
1855 _cleanup_free_ char *value = NULL;
1856 int r;
1857
1858 assert(ret);
1859
1860 r = cg_get_attribute(controller, path, attribute, &value);
1861 if (r == -ENOENT)
1862 return -ENODATA;
1863 if (r < 0)
1864 return r;
1865
1866 r = parse_boolean(value);
1867 if (r < 0)
1868 return r;
1869
1870 *ret = r;
1871 return 0;
1872}
1873
bd1791b5 1874int cg_get_owner(const char *path, uid_t *ret_uid) {
59331b8e
AZ
1875 _cleanup_free_ char *f = NULL;
1876 struct stat stats;
1877 int r;
1878
1879 assert(ret_uid);
1880
bd1791b5 1881 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
59331b8e
AZ
1882 if (r < 0)
1883 return r;
1884
b30da1c6 1885 if (stat(f, &stats) < 0)
59331b8e
AZ
1886 return -errno;
1887
b30da1c6
LP
1888 r = stat_verify_directory(&stats);
1889 if (r < 0)
1890 return r;
1891
59331b8e
AZ
1892 *ret_uid = stats.st_uid;
1893 return 0;
1894}
1895
25a1f04c 1896int cg_get_keyed_attribute_full(
b734a4ff
LP
1897 const char *controller,
1898 const char *path,
1899 const char *attribute,
1900 char **keys,
25a1f04c
MS
1901 char **ret_values,
1902 CGroupKeyMode mode) {
66ebf6c0 1903
b734a4ff 1904 _cleanup_free_ char *filename = NULL, *contents = NULL;
b734a4ff 1905 const char *p;
9177fa9f 1906 size_t n, i, n_done = 0;
b734a4ff
LP
1907 char **v;
1908 int r;
1909
4e1dfa45 1910 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
b734a4ff
LP
1911 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1912 * entries as 'keys'. On success each entry will be set to the value of the matching key.
1913 *
d9e45bc3
MS
1914 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1915 * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
66ebf6c0
TH
1916
1917 r = cg_get_path(controller, path, attribute, &filename);
1918 if (r < 0)
1919 return r;
1920
b734a4ff 1921 r = read_full_file(filename, &contents, NULL);
66ebf6c0
TH
1922 if (r < 0)
1923 return r;
1924
b734a4ff
LP
1925 n = strv_length(keys);
1926 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1927 return 0;
66ebf6c0 1928
b734a4ff
LP
1929 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1930 v = newa0(char*, n);
66ebf6c0 1931
b734a4ff
LP
1932 for (p = contents; *p;) {
1933 const char *w = NULL;
b734a4ff 1934
9177fa9f
ZJS
1935 for (i = 0; i < n; i++)
1936 if (!v[i]) {
b734a4ff
LP
1937 w = first_word(p, keys[i]);
1938 if (w)
1939 break;
66ebf6c0 1940 }
66ebf6c0 1941
b734a4ff 1942 if (w) {
b734a4ff
LP
1943 size_t l;
1944
1945 l = strcspn(w, NEWLINE);
9177fa9f
ZJS
1946 v[i] = strndup(w, l);
1947 if (!v[i]) {
b734a4ff
LP
1948 r = -ENOMEM;
1949 goto fail;
66ebf6c0 1950 }
b734a4ff 1951
b734a4ff 1952 n_done++;
b734a4ff
LP
1953 if (n_done >= n)
1954 goto done;
1955
1956 p = w + l;
9177fa9f 1957 } else
b734a4ff 1958 p += strcspn(p, NEWLINE);
b734a4ff
LP
1959
1960 p += strspn(p, NEWLINE);
66ebf6c0
TH
1961 }
1962
25a1f04c
MS
1963 if (mode & CG_KEY_MODE_GRACEFUL)
1964 goto done;
d9e45bc3
MS
1965
1966 r = -ENXIO;
b734a4ff
LP
1967
1968fail:
24ae45cb 1969 free_many_charp(v, n);
b734a4ff
LP
1970 return r;
1971
1972done:
1973 memcpy(ret_values, v, sizeof(char*) * n);
25a1f04c
MS
1974 if (mode & CG_KEY_MODE_GRACEFUL)
1975 return n_done;
1976
66ebf6c0 1977 return 0;
4ad49000
LP
1978}
1979
aae7e17f 1980int cg_mask_to_string(CGroupMask mask, char **ret) {
ec635a2d 1981 _cleanup_free_ char *s = NULL;
ec635a2d 1982 bool space = false;
aae7e17f 1983 CGroupController c;
319a4f4b 1984 size_t n = 0;
aae7e17f
FB
1985
1986 assert(ret);
1987
1988 if (mask == 0) {
1989 *ret = NULL;
1990 return 0;
1991 }
1992
1993 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
ec635a2d
LP
1994 const char *k;
1995 size_t l;
aae7e17f 1996
f99850a0 1997 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
aae7e17f
FB
1998 continue;
1999
ec635a2d
LP
2000 k = cgroup_controller_to_string(c);
2001 l = strlen(k);
2002
319a4f4b 2003 if (!GREEDY_REALLOC(s, n + space + l + 1))
ec635a2d
LP
2004 return -ENOMEM;
2005
2006 if (space)
2007 s[n] = ' ';
2008 memcpy(s + n + space, k, l);
2009 n += space + l;
2010
2011 space = true;
aae7e17f
FB
2012 }
2013
ec635a2d 2014 assert(s);
aae7e17f 2015
ec635a2d 2016 s[n] = 0;
ae2a15bc 2017 *ret = TAKE_PTR(s);
ec635a2d 2018
aae7e17f
FB
2019 return 0;
2020}
2021
38a90d45
LP
2022int cg_mask_from_string(const char *value, CGroupMask *ret) {
2023 CGroupMask m = 0;
2024
2025 assert(ret);
aae7e17f
FB
2026 assert(value);
2027
2028 for (;;) {
2029 _cleanup_free_ char *n = NULL;
2030 CGroupController v;
2031 int r;
2032
2033 r = extract_first_word(&value, &n, NULL, 0);
2034 if (r < 0)
2035 return r;
2036 if (r == 0)
2037 break;
2038
2039 v = cgroup_controller_from_string(n);
2040 if (v < 0)
2041 continue;
2042
38a90d45 2043 m |= CGROUP_CONTROLLER_TO_MASK(v);
aae7e17f 2044 }
38a90d45
LP
2045
2046 *ret = m;
aae7e17f
FB
2047 return 0;
2048}
2049
0fa7b500 2050int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
38a90d45 2051 CGroupMask mask;
415fc41c 2052 int r;
efdb0237 2053
67558d15
LP
2054 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2055 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2056 * pseudo-controllers. */
4ad49000 2057
b4cccbc1
LP
2058 r = cg_all_unified();
2059 if (r < 0)
2060 return r;
2061 if (r > 0) {
0fa7b500 2062 _cleanup_free_ char *controllers = NULL, *path = NULL;
efdb0237 2063
d51c4fca
YW
2064 /* In the unified hierarchy we can read the supported and accessible controllers from
2065 * the top-level cgroup attribute */
efdb0237 2066
5f4c5fef
LP
2067 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2068 if (r < 0)
2069 return r;
2070
2071 r = read_one_line_file(path, &controllers);
efdb0237
LP
2072 if (r < 0)
2073 return r;
4ad49000 2074
aae7e17f
FB
2075 r = cg_mask_from_string(controllers, &mask);
2076 if (r < 0)
2077 return r;
efdb0237 2078
1fbbb526 2079 /* Mask controllers that are not supported in unified hierarchy. */
03afd780 2080 mask &= CGROUP_MASK_V2;
efdb0237
LP
2081
2082 } else {
2083 CGroupController c;
2084
0fa7b500 2085 /* In the legacy hierarchy, we check which hierarchies are accessible. */
efdb0237 2086
38a90d45 2087 mask = 0;
efdb0237 2088 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
03afd780 2089 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
efdb0237
LP
2090 const char *n;
2091
03afd780
LP
2092 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2093 continue;
2094
efdb0237 2095 n = cgroup_controller_to_string(c);
0fa7b500 2096 if (controller_is_v1_accessible(root, n) >= 0)
03afd780 2097 mask |= bit;
efdb0237 2098 }
4ad49000
LP
2099 }
2100
efdb0237
LP
2101 *ret = mask;
2102 return 0;
4ad49000 2103}
b12afc8c 2104
0fa7b500
MK
2105int cg_mask_supported(CGroupMask *ret) {
2106 _cleanup_free_ char *root = NULL;
2107 int r;
2108
2109 r = cg_get_root_path(&root);
2110 if (r < 0)
2111 return r;
2112
2113 return cg_mask_supported_subtree(root, ret);
2114}
2115
6925a0de 2116int cg_kernel_controllers(Set **ret) {
594c3835 2117 _cleanup_set_free_ Set *controllers = NULL;
b12afc8c 2118 _cleanup_fclose_ FILE *f = NULL;
b12afc8c
LP
2119 int r;
2120
6925a0de 2121 assert(ret);
b12afc8c 2122
f09e86bc
LS
2123 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2124 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2125 * pseudo-controllers. */
e155a0aa 2126
fdeea3f4
ZJS
2127 r = fopen_unlocked("/proc/cgroups", "re", &f);
2128 if (r == -ENOENT) {
2129 *ret = NULL;
2130 return 0;
b12afc8c 2131 }
fdeea3f4
ZJS
2132 if (r < 0)
2133 return r;
35bbbf85 2134
b12afc8c 2135 /* Ignore the header line */
f5fbe71d 2136 (void) read_line(f, SIZE_MAX, NULL);
b12afc8c
LP
2137
2138 for (;;) {
dccdbf9b 2139 _cleanup_free_ char *controller = NULL;
b12afc8c
LP
2140 int enabled = 0;
2141
b12afc8c
LP
2142 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2143
121b7054
MY
2144 if (ferror(f))
2145 return -errno;
2146
b12afc8c
LP
2147 if (feof(f))
2148 break;
2149
b12afc8c
LP
2150 return -EBADMSG;
2151 }
2152
dccdbf9b 2153 if (!enabled)
b12afc8c 2154 continue;
b12afc8c 2155
dccdbf9b 2156 if (!cg_controller_is_valid(controller))
b12afc8c 2157 return -EBADMSG;
b12afc8c 2158
594c3835 2159 r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
b12afc8c
LP
2160 if (r < 0)
2161 return r;
2162 }
2163
1cc6c93a 2164 *ret = TAKE_PTR(controllers);
6925a0de 2165
b12afc8c
LP
2166 return 0;
2167}
efdb0237 2168
d4d99bc6
ZJS
2169/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2170 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2171 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2172 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2173 * with other tools.
f08e9287 2174 *
d4d99bc6
ZJS
2175 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2176 * cgroup v2 process management but disable the compat dual layout, we return true on
2177 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
f08e9287
TH
2178 */
2179static thread_local bool unified_systemd_v232;
2180
d4d99bc6
ZJS
2181int cg_unified_cached(bool flush) {
2182 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
efdb0237 2183
efdb0237
LP
2184 struct statfs fs;
2185
2186 /* Checks if we support the unified hierarchy. Returns an
2187 * error when the cgroup hierarchies aren't mounted yet or we
2188 * have any other trouble determining if the unified hierarchy
2189 * is supported. */
2190
d4d99bc6
ZJS
2191 if (flush)
2192 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2193 else if (unified_cache >= CGROUP_UNIFIED_NONE)
2194 return unified_cache;
efdb0237
LP
2195
2196 if (statfs("/sys/fs/cgroup/", &fs) < 0)
c028bed1 2197 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
efdb0237 2198
9aa21133
ZJS
2199 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2200 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
5da38d07 2201 unified_cache = CGROUP_UNIFIED_ALL;
9aa21133 2202 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2203 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2204 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
9aa21133 2205 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2977724b 2206 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287 2207 unified_systemd_v232 = false;
f08e9287 2208 } else {
2156061f
MG
2209 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
2210 if (errno == ENOENT) {
2211 /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2212 log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
2213 return -ENOMEDIUM;
2214 }
9aa21133 2215 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2156061f 2216 }
5535d8f7
EV
2217
2218 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2219 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2220 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2221 unified_systemd_v232 = true;
2222 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2223 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2224 unified_cache = CGROUP_UNIFIED_NONE;
2225 } else {
2226 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
9aa21133 2227 (unsigned long long) fs.f_type);
5535d8f7 2228 unified_cache = CGROUP_UNIFIED_NONE;
9aa21133 2229 }
2977724b 2230 }
0bc5f001
DS
2231 } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
2232 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2233 "No filesystem is currently mounted on /sys/fs/cgroup.");
baaa35ad
ZJS
2234 } else
2235 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2236 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2237 (unsigned long long)fs.f_type);
efdb0237 2238
d4d99bc6 2239 return unified_cache;
5da38d07
TH
2240}
2241
c22800e4 2242int cg_unified_controller(const char *controller) {
b4cccbc1 2243 int r;
5da38d07 2244
d4d99bc6 2245 r = cg_unified_cached(false);
b4cccbc1
LP
2246 if (r < 0)
2247 return r;
5da38d07 2248
d4d99bc6 2249 if (r == CGROUP_UNIFIED_NONE)
fc9ae717
LP
2250 return false;
2251
d4d99bc6 2252 if (r >= CGROUP_UNIFIED_ALL)
fc9ae717
LP
2253 return true;
2254
2255 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2256}
2257
b4cccbc1 2258int cg_all_unified(void) {
4bb652ac
LP
2259 int r;
2260
d4d99bc6 2261 r = cg_unified_cached(false);
4bb652ac
LP
2262 if (r < 0)
2263 return r;
2264
d4d99bc6 2265 return r >= CGROUP_UNIFIED_ALL;
efdb0237
LP
2266}
2267
b4cccbc1
LP
2268int cg_hybrid_unified(void) {
2269 int r;
2977724b 2270
d4d99bc6 2271 r = cg_unified_cached(false);
b4cccbc1
LP
2272 if (r < 0)
2273 return r;
2977724b 2274
d4d99bc6 2275 return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
13c31542
TH
2276}
2277
b426b4ee
NR
2278int cg_is_delegated(const char *path) {
2279 int r;
2280
2281 assert(path);
2282
2283 r = cg_get_xattr_bool(path, "trusted.delegate");
6414203c
LP
2284 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2285 return r;
b426b4ee 2286
6414203c
LP
2287 /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2288 * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2289 * delegated or not this should be safe. */
2290 r = cg_get_xattr_bool(path, "user.delegate");
2291 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
2292}
2293
2294int cg_is_delegated_fd(int fd) {
2295 int r;
2296
2297 assert(fd >= 0);
2298
2299 r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
2300 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2301 return r;
2302
2303 r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
2304 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
b426b4ee
NR
2305}
2306
6cf96ab4
NR
2307int cg_has_coredump_receive(const char *path) {
2308 int r;
2309
2310 assert(path);
2311
2312 r = cg_get_xattr_bool(path, "user.coredump_receive");
2313 if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2314 return false;
2315
2316 return r;
2317}
2318
9be57249
TH
2319const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2320 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2321 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2322 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2323 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2324};
2325
2326static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2327 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2328 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2329 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2330 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2331};
2332
2333DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2334
f0bef277
EV
2335bool is_cgroup_fs(const struct statfs *s) {
2336 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2337 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2338}
2339
2340bool fd_is_cgroup_fs(int fd) {
2341 struct statfs s;
2342
2343 if (fstatfs(fd, &s) < 0)
2344 return -errno;
2345
2346 return is_cgroup_fs(&s);
2347}
2348
b82f71c7 2349static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
efdb0237
LP
2350 [CGROUP_CONTROLLER_CPU] = "cpu",
2351 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
047f5d63 2352 [CGROUP_CONTROLLER_CPUSET] = "cpuset",
13c31542 2353 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2354 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2355 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2356 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2357 [CGROUP_CONTROLLER_PIDS] = "pids",
17f14955 2358 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
084c7007 2359 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
506ea51b 2360 [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
a8e5eb17 2361 [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
6f50d4f7 2362 [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
efdb0237
LP
2363};
2364
2365DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
f98c2585
CD
2366
2367CGroupMask get_cpu_accounting_mask(void) {
2368 static CGroupMask needed_mask = (CGroupMask) -1;
2369
2370 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2371 * provided externally from the CPU controller, which means we don't
2372 * need to enable the CPU controller just to get metrics. This is good,
2373 * because enabling the CPU controller comes at a minor performance
2374 * hit, especially when it's propagated deep into large hierarchies.
2375 * There's also no separate CPU accounting controller available within
2376 * a unified hierarchy.
2377 *
2378 * This combination of factors results in the desired cgroup mask to
2379 * enable for CPU accounting varying as follows:
2380 *
2381 * ╔═════════════════════╤═════════════════════╗
2382 * ║ Linux ≥4.15 │ Linux <4.15 ║
2383 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2384 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2385 * ╟───────────────╫─────────────────────┼─────────────────────╢
2386 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2387 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2388 *
2389 * We check kernel version here instead of manually checking whether
2390 * cpu.stat is present for every cgroup, as that check in itself would
2391 * already be fairly expensive.
2392 *
2393 * Kernels where this patch has been backported will therefore have the
2394 * CPU controller enabled unnecessarily. This is more expensive than
2395 * necessary, but harmless. ☺️
2396 */
2397
2398 if (needed_mask == (CGroupMask) -1) {
2399 if (cg_all_unified()) {
2400 struct utsname u;
2401 assert_se(uname(&u) >= 0);
2402
8087644a 2403 if (strverscmp_improved(u.release, "4.15") < 0)
f98c2585
CD
2404 needed_mask = CGROUP_MASK_CPU;
2405 else
2406 needed_mask = 0;
2407 } else
2408 needed_mask = CGROUP_MASK_CPUACCT;
2409 }
2410
2411 return needed_mask;
2412}
2413
2414bool cpu_accounting_is_cheap(void) {
2415 return get_cpu_accounting_mask() == 0;
2416}
4d824a4e
AZ
2417
2418static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2419 [MANAGED_OOM_AUTO] = "auto",
2420 [MANAGED_OOM_KILL] = "kill",
2421};
2422
2423DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
242d75bd
AZ
2424
2425static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2426 [MANAGED_OOM_PREFERENCE_NONE] = "none",
2427 [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2428 [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2429};
2430
2431DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);