]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/cgroup-util.c
cgroup-util: check unified_cache before invoking streq()
[thirdparty/systemd.git] / src / basic / cgroup-util.c
CommitLineData
8c6db833
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
8c6db833
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
8c6db833 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
8c6db833
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
84ac7bea 20#include <dirent.h>
8c6db833 21#include <errno.h>
84ac7bea 22#include <ftw.h>
11c3a366 23#include <limits.h>
8c6db833 24#include <signal.h>
11c3a366 25#include <stddef.h>
8c6db833 26#include <stdlib.h>
84ac7bea 27#include <string.h>
672c48cc 28#include <sys/stat.h>
11c3a366 29#include <sys/statfs.h>
672c48cc 30#include <sys/types.h>
4b58153d 31#include <sys/xattr.h>
84ac7bea 32#include <unistd.h>
8c6db833 33
b5efdb8a 34#include "alloc-util.h"
3ffd4af2 35#include "cgroup-util.h"
93cc7779 36#include "def.h"
a0956174 37#include "dirent-util.h"
84ac7bea 38#include "extract-word.h"
3ffd4af2 39#include "fd-util.h"
84ac7bea 40#include "fileio.h"
f97b34a6 41#include "format-util.h"
f4f15635 42#include "fs-util.h"
93cc7779 43#include "log.h"
84ac7bea
LP
44#include "login-util.h"
45#include "macro.h"
93cc7779 46#include "missing.h"
84ac7bea 47#include "mkdir.h"
6bedfcbb 48#include "parse-util.h"
9eb977db 49#include "path-util.h"
872a590e 50#include "proc-cmdline.h"
84ac7bea
LP
51#include "process-util.h"
52#include "set.h"
9444b1f2 53#include "special.h"
872a590e 54#include "stat-util.h"
d054f0a4 55#include "stdio-util.h"
8b43440b 56#include "string-table.h"
07630cea 57#include "string-util.h"
84ac7bea 58#include "unit-name.h"
b1d4f8e1 59#include "user-util.h"
8c6db833 60
c6c18be3 61int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
7027ff61 62 _cleanup_free_ char *fs = NULL;
c6c18be3 63 FILE *f;
7027ff61 64 int r;
c6c18be3 65
c6c18be3
LP
66 assert(_f);
67
c3175a7f
LP
68 r = cg_get_path(controller, path, "cgroup.procs", &fs);
69 if (r < 0)
c6c18be3
LP
70 return r;
71
72 f = fopen(fs, "re");
c6c18be3
LP
73 if (!f)
74 return -errno;
75
76 *_f = f;
77 return 0;
78}
79
c6c18be3
LP
80int cg_read_pid(FILE *f, pid_t *_pid) {
81 unsigned long ul;
82
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
85
7027ff61
LP
86 assert(f);
87 assert(_pid);
88
c6c18be3
LP
89 errno = 0;
90 if (fscanf(f, "%lu", &ul) != 1) {
91
92 if (feof(f))
93 return 0;
94
f5e5c28f 95 return errno > 0 ? -errno : -EIO;
c6c18be3
LP
96 }
97
98 if (ul <= 0)
99 return -EIO;
100
101 *_pid = (pid_t) ul;
102 return 1;
103}
104
ab2c3861
TH
105int cg_read_event(const char *controller, const char *path, const char *event,
106 char **val)
107{
108 _cleanup_free_ char *events = NULL, *content = NULL;
109 char *p, *line;
110 int r;
111
112 r = cg_get_path(controller, path, "cgroup.events", &events);
113 if (r < 0)
114 return r;
115
116 r = read_full_file(events, &content, NULL);
117 if (r < 0)
118 return r;
119
120 p = content;
121 while ((line = strsep(&p, "\n"))) {
122 char *key;
123
124 key = strsep(&line, " ");
125 if (!key || !line)
126 return -EINVAL;
127
128 if (strcmp(key, event))
129 continue;
130
131 *val = strdup(line);
132 return 0;
133 }
134
135 return -ENOENT;
136}
137
3228995c
CB
138bool cg_ns_supported(void) {
139 static thread_local int enabled = -1;
140
141 if (enabled >= 0)
142 return enabled;
143
144 if (access("/proc/self/ns/cgroup", F_OK) == 0)
145 enabled = 1;
146 else
147 enabled = 0;
148
149 return enabled;
150}
151
35d2e7ec 152int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
7027ff61 153 _cleanup_free_ char *fs = NULL;
35d2e7ec
LP
154 int r;
155 DIR *d;
156
35d2e7ec
LP
157 assert(_d);
158
159 /* This is not recursive! */
160
c3175a7f
LP
161 r = cg_get_path(controller, path, NULL, &fs);
162 if (r < 0)
35d2e7ec
LP
163 return r;
164
165 d = opendir(fs);
35d2e7ec
LP
166 if (!d)
167 return -errno;
168
169 *_d = d;
170 return 0;
171}
172
173int cg_read_subgroup(DIR *d, char **fn) {
174 struct dirent *de;
175
176 assert(d);
7027ff61 177 assert(fn);
35d2e7ec 178
f01327ad 179 FOREACH_DIRENT_ALL(de, d, return -errno) {
35d2e7ec
LP
180 char *b;
181
182 if (de->d_type != DT_DIR)
183 continue;
184
49bfc877 185 if (dot_or_dot_dot(de->d_name))
35d2e7ec
LP
186 continue;
187
7027ff61
LP
188 b = strdup(de->d_name);
189 if (!b)
35d2e7ec
LP
190 return -ENOMEM;
191
192 *fn = b;
193 return 1;
194 }
195
35d2e7ec
LP
196 return 0;
197}
198
4ad49000 199int cg_rmdir(const char *controller, const char *path) {
7027ff61 200 _cleanup_free_ char *p = NULL;
35d2e7ec
LP
201 int r;
202
ad293f5a
LP
203 r = cg_get_path(controller, path, NULL, &p);
204 if (r < 0)
35d2e7ec
LP
205 return r;
206
207 r = rmdir(p);
7027ff61
LP
208 if (r < 0 && errno != ENOENT)
209 return -errno;
35d2e7ec 210
2977724b
TH
211 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
212 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
213 if (r < 0)
214 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
215 }
216
7027ff61 217 return 0;
35d2e7ec
LP
218}
219
1d98fef1
LP
220int cg_kill(
221 const char *controller,
222 const char *path,
223 int sig,
224 CGroupFlags flags,
225 Set *s,
226 cg_kill_log_func_t log_kill,
227 void *userdata) {
228
7027ff61 229 _cleanup_set_free_ Set *allocated_set = NULL;
35d2e7ec 230 bool done = false;
8c6db833 231 int r, ret = 0;
35d2e7ec 232 pid_t my_pid;
8c6db833 233
8c6db833
LP
234 assert(sig >= 0);
235
0d5b4810
LP
236 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
237 * SIGCONT on SIGKILL. */
238 if (IN_SET(sig, SIGCONT, SIGKILL))
239 flags &= ~CGROUP_SIGCONT;
240
8c6db833
LP
241 /* This goes through the tasks list and kills them all. This
242 * is repeated until no further processes are added to the
243 * tasks list, to properly handle forking processes */
244
7027ff61 245 if (!s) {
d5099efc 246 s = allocated_set = set_new(NULL);
7027ff61 247 if (!s)
ca949c9d 248 return -ENOMEM;
7027ff61 249 }
8c6db833
LP
250
251 my_pid = getpid();
252
253 do {
7027ff61 254 _cleanup_fclose_ FILE *f = NULL;
0b172489 255 pid_t pid = 0;
8c6db833
LP
256 done = true;
257
7027ff61
LP
258 r = cg_enumerate_processes(controller, path, &f);
259 if (r < 0) {
4c633005 260 if (ret >= 0 && r != -ENOENT)
7027ff61 261 return r;
35d2e7ec 262
7027ff61 263 return ret;
35d2e7ec 264 }
c6c18be3
LP
265
266 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 267
1d98fef1 268 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 269 continue;
8c6db833 270
fea72cc0 271 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
c6c18be3 272 continue;
8c6db833 273
1d98fef1
LP
274 if (log_kill)
275 log_kill(pid, sig, userdata);
276
8c6db833
LP
277 /* If we haven't killed this process yet, kill
278 * it */
4c633005
LP
279 if (kill(pid, sig) < 0) {
280 if (ret >= 0 && errno != ESRCH)
8c6db833 281 ret = -errno;
6e8314c4 282 } else {
1d98fef1 283 if (flags & CGROUP_SIGCONT)
e155a0aa 284 (void) kill(pid, SIGCONT);
430c18ed 285
6e8314c4
LP
286 if (ret == 0)
287 ret = 1;
430c18ed 288 }
8c6db833 289
8c6db833
LP
290 done = false;
291
fea72cc0 292 r = set_put(s, PID_TO_PTR(pid));
7027ff61 293 if (r < 0) {
35d2e7ec 294 if (ret >= 0)
7027ff61 295 return r;
35d2e7ec 296
7027ff61 297 return ret;
35d2e7ec
LP
298 }
299 }
300
301 if (r < 0) {
302 if (ret >= 0)
7027ff61 303 return r;
35d2e7ec 304
7027ff61 305 return ret;
8c6db833
LP
306 }
307
8c6db833
LP
308 /* To avoid racing against processes which fork
309 * quicker than we can kill them we repeat this until
310 * no new pids need to be killed. */
311
35d2e7ec 312 } while (!done);
8c6db833 313
35d2e7ec 314 return ret;
8c6db833
LP
315}
316
1d98fef1
LP
317int cg_kill_recursive(
318 const char *controller,
319 const char *path,
320 int sig,
321 CGroupFlags flags,
322 Set *s,
323 cg_kill_log_func_t log_kill,
324 void *userdata) {
325
7027ff61
LP
326 _cleanup_set_free_ Set *allocated_set = NULL;
327 _cleanup_closedir_ DIR *d = NULL;
e155a0aa 328 int r, ret;
35d2e7ec 329 char *fn;
8c6db833
LP
330
331 assert(path);
8c6db833
LP
332 assert(sig >= 0);
333
7027ff61 334 if (!s) {
d5099efc 335 s = allocated_set = set_new(NULL);
7027ff61 336 if (!s)
ca949c9d 337 return -ENOMEM;
7027ff61 338 }
ca949c9d 339
1d98fef1 340 ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
8c6db833 341
7027ff61
LP
342 r = cg_enumerate_subgroups(controller, path, &d);
343 if (r < 0) {
4c633005 344 if (ret >= 0 && r != -ENOENT)
7027ff61 345 return r;
8c6db833 346
7027ff61 347 return ret;
35d2e7ec 348 }
8c6db833 349
35d2e7ec 350 while ((r = cg_read_subgroup(d, &fn)) > 0) {
7027ff61 351 _cleanup_free_ char *p = NULL;
8c6db833 352
605405c6 353 p = strjoin(path, "/", fn);
35d2e7ec 354 free(fn);
7027ff61
LP
355 if (!p)
356 return -ENOMEM;
8c6db833 357
1d98fef1 358 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
e155a0aa 359 if (r != 0 && ret >= 0)
35d2e7ec 360 ret = r;
8c6db833 361 }
7027ff61 362 if (ret >= 0 && r < 0)
35d2e7ec
LP
363 ret = r;
364
1d98fef1 365 if (flags & CGROUP_REMOVE) {
4ad49000 366 r = cg_rmdir(controller, path);
7027ff61
LP
367 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
368 return r;
369 }
ca949c9d 370
8c6db833
LP
371 return ret;
372}
373
1d98fef1
LP
374int cg_migrate(
375 const char *cfrom,
376 const char *pfrom,
377 const char *cto,
378 const char *pto,
379 CGroupFlags flags) {
380
35d2e7ec 381 bool done = false;
246aa6dd 382 _cleanup_set_free_ Set *s = NULL;
8c6db833
LP
383 int r, ret = 0;
384 pid_t my_pid;
385
246aa6dd
LP
386 assert(cfrom);
387 assert(pfrom);
388 assert(cto);
389 assert(pto);
8c6db833 390
d5099efc 391 s = set_new(NULL);
246aa6dd 392 if (!s)
35d2e7ec
LP
393 return -ENOMEM;
394
8c6db833
LP
395 my_pid = getpid();
396
397 do {
7027ff61 398 _cleanup_fclose_ FILE *f = NULL;
0b172489 399 pid_t pid = 0;
8c6db833
LP
400 done = true;
401
b043cd0b 402 r = cg_enumerate_processes(cfrom, pfrom, &f);
246aa6dd 403 if (r < 0) {
4c633005 404 if (ret >= 0 && r != -ENOENT)
7027ff61 405 return r;
35d2e7ec 406
246aa6dd 407 return ret;
35d2e7ec 408 }
c6c18be3
LP
409
410 while ((r = cg_read_pid(f, &pid)) > 0) {
8c6db833 411
35d2e7ec
LP
412 /* This might do weird stuff if we aren't a
413 * single-threaded program. However, we
414 * luckily know we are not */
1d98fef1 415 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
c6c18be3 416 continue;
8c6db833 417
fea72cc0 418 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
35d2e7ec
LP
419 continue;
420
9b84c7f9
LP
421 /* Ignore kernel threads. Since they can only
422 * exist in the root cgroup, we only check for
423 * them there. */
424 if (cfrom &&
425 (isempty(pfrom) || path_equal(pfrom, "/")) &&
426 is_kernel_thread(pid) > 0)
427 continue;
428
246aa6dd
LP
429 r = cg_attach(cto, pto, pid);
430 if (r < 0) {
4c633005 431 if (ret >= 0 && r != -ESRCH)
35d2e7ec
LP
432 ret = r;
433 } else if (ret == 0)
434 ret = 1;
8c6db833 435
8c6db833 436 done = false;
35d2e7ec 437
fea72cc0 438 r = set_put(s, PID_TO_PTR(pid));
246aa6dd 439 if (r < 0) {
35d2e7ec 440 if (ret >= 0)
7027ff61 441 return r;
35d2e7ec 442
246aa6dd 443 return ret;
35d2e7ec
LP
444 }
445 }
446
447 if (r < 0) {
448 if (ret >= 0)
7027ff61 449 return r;
35d2e7ec 450
246aa6dd 451 return ret;
8c6db833 452 }
35d2e7ec 453 } while (!done);
8c6db833 454
35d2e7ec 455 return ret;
8c6db833
LP
456}
457
4ad49000
LP
458int cg_migrate_recursive(
459 const char *cfrom,
460 const char *pfrom,
461 const char *cto,
462 const char *pto,
1d98fef1 463 CGroupFlags flags) {
4ad49000 464
246aa6dd 465 _cleanup_closedir_ DIR *d = NULL;
7027ff61 466 int r, ret = 0;
35d2e7ec 467 char *fn;
8c6db833 468
246aa6dd
LP
469 assert(cfrom);
470 assert(pfrom);
471 assert(cto);
472 assert(pto);
8c6db833 473
1d98fef1 474 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
8c6db833 475
246aa6dd
LP
476 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
477 if (r < 0) {
4c633005 478 if (ret >= 0 && r != -ENOENT)
7027ff61
LP
479 return r;
480
246aa6dd 481 return ret;
35d2e7ec
LP
482 }
483
484 while ((r = cg_read_subgroup(d, &fn)) > 0) {
246aa6dd 485 _cleanup_free_ char *p = NULL;
8c6db833 486
605405c6 487 p = strjoin(pfrom, "/", fn);
35d2e7ec 488 free(fn);
e155a0aa
LP
489 if (!p)
490 return -ENOMEM;
8c6db833 491
1d98fef1 492 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
35d2e7ec
LP
493 if (r != 0 && ret >= 0)
494 ret = r;
8c6db833
LP
495 }
496
35d2e7ec
LP
497 if (r < 0 && ret >= 0)
498 ret = r;
499
1d98fef1 500 if (flags & CGROUP_REMOVE) {
4ad49000 501 r = cg_rmdir(cfrom, pfrom);
246aa6dd
LP
502 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
503 return r;
504 }
8c6db833
LP
505
506 return ret;
507}
508
13b84ec7
LP
509int cg_migrate_recursive_fallback(
510 const char *cfrom,
511 const char *pfrom,
512 const char *cto,
513 const char *pto,
1d98fef1 514 CGroupFlags flags) {
13b84ec7
LP
515
516 int r;
517
518 assert(cfrom);
519 assert(pfrom);
520 assert(cto);
521 assert(pto);
522
1d98fef1 523 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
13b84ec7
LP
524 if (r < 0) {
525 char prefix[strlen(pto) + 1];
526
527 /* This didn't work? Then let's try all prefixes of the destination */
528
fecffe5d 529 PATH_FOREACH_PREFIX(prefix, pto) {
e155a0aa
LP
530 int q;
531
1d98fef1 532 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
e155a0aa
LP
533 if (q >= 0)
534 return q;
13b84ec7
LP
535 }
536 }
537
e155a0aa 538 return r;
13b84ec7
LP
539}
540
efdb0237
LP
541static const char *controller_to_dirname(const char *controller) {
542 const char *e;
3474ae3c 543
7027ff61
LP
544 assert(controller);
545
efdb0237
LP
546 /* Converts a controller name to the directory name below
547 * /sys/fs/cgroup/ we want to mount it to. Effectively, this
548 * just cuts off the name= prefixed used for named
549 * hierarchies, if it is specified. */
550
2977724b
TH
551 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
552 if (cg_hybrid_unified())
553 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
554 else
555 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
556 }
b6629c4b 557
efdb0237
LP
558 e = startswith(controller, "name=");
559 if (e)
560 return e;
561
562 return controller;
3474ae3c
LP
563}
564
569b19d8
LP
565static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
566 const char *dn;
018ef268 567 char *t = NULL;
3474ae3c 568
efdb0237 569 assert(fs);
569b19d8
LP
570 assert(controller);
571
572 dn = controller_to_dirname(controller);
efdb0237
LP
573
574 if (isempty(path) && isempty(suffix))
569b19d8 575 t = strappend("/sys/fs/cgroup/", dn);
efdb0237 576 else if (isempty(path))
605405c6 577 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
efdb0237 578 else if (isempty(suffix))
605405c6 579 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
efdb0237 580 else
605405c6 581 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
efdb0237
LP
582 if (!t)
583 return -ENOMEM;
3474ae3c 584
efdb0237
LP
585 *fs = t;
586 return 0;
587}
588
589static int join_path_unified(const char *path, const char *suffix, char **fs) {
590 char *t;
591
592 assert(fs);
593
594 if (isempty(path) && isempty(suffix))
595 t = strdup("/sys/fs/cgroup");
596 else if (isempty(path))
597 t = strappend("/sys/fs/cgroup/", suffix);
598 else if (isempty(suffix))
599 t = strappend("/sys/fs/cgroup/", path);
600 else
605405c6 601 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
3474ae3c
LP
602 if (!t)
603 return -ENOMEM;
604
efdb0237 605 *fs = t;
3474ae3c
LP
606 return 0;
607}
608
8c6db833 609int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
415fc41c 610 int r;
8c6db833 611
dbd821ac
LP
612 assert(fs);
613
efdb0237
LP
614 if (!controller) {
615 char *t;
616
569b19d8
LP
617 /* If no controller is specified, we return the path
618 * *below* the controllers, without any prefix. */
efdb0237
LP
619
620 if (!path && !suffix)
621 return -EINVAL;
622
989189ea 623 if (!suffix)
efdb0237 624 t = strdup(path);
989189ea 625 else if (!path)
efdb0237
LP
626 t = strdup(suffix);
627 else
605405c6 628 t = strjoin(path, "/", suffix);
efdb0237
LP
629 if (!t)
630 return -ENOMEM;
631
632 *fs = path_kill_slashes(t);
633 return 0;
634 }
635
636 if (!cg_controller_is_valid(controller))
78edb35a
LP
637 return -EINVAL;
638
415fc41c 639 if (cg_all_unified())
efdb0237 640 r = join_path_unified(path, suffix, fs);
569b19d8
LP
641 else
642 r = join_path_legacy(controller, path, suffix, fs);
efdb0237
LP
643 if (r < 0)
644 return r;
7027ff61 645
efdb0237
LP
646 path_kill_slashes(*fs);
647 return 0;
3474ae3c 648}
dbd821ac 649
efdb0237 650static int controller_is_accessible(const char *controller) {
37099707 651
efdb0237 652 assert(controller);
37099707 653
efdb0237
LP
654 /* Checks whether a specific controller is accessible,
655 * i.e. its hierarchy mounted. In the unified hierarchy all
656 * controllers are considered accessible, except for the named
657 * hierarchies */
b12afc8c 658
efdb0237
LP
659 if (!cg_controller_is_valid(controller))
660 return -EINVAL;
661
415fc41c 662 if (cg_all_unified()) {
efdb0237
LP
663 /* We don't support named hierarchies if we are using
664 * the unified hierarchy. */
665
666 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
667 return 0;
668
669 if (startswith(controller, "name="))
670 return -EOPNOTSUPP;
671
672 } else {
673 const char *cc, *dn;
674
675 dn = controller_to_dirname(controller);
676 cc = strjoina("/sys/fs/cgroup/", dn);
677
678 if (laccess(cc, F_OK) < 0)
679 return -errno;
680 }
37099707
LP
681
682 return 0;
683}
684
3474ae3c 685int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
37099707 686 int r;
dbd821ac 687
efdb0237 688 assert(controller);
3474ae3c 689 assert(fs);
70132bd0 690
efdb0237
LP
691 /* Check if the specified controller is actually accessible */
692 r = controller_is_accessible(controller);
37099707
LP
693 if (r < 0)
694 return r;
3474ae3c 695
efdb0237 696 return cg_get_path(controller, path, suffix, fs);
8c6db833
LP
697}
698
e27796a0 699static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
4ad49000
LP
700 assert(path);
701 assert(sb);
702 assert(ftwbuf);
e27796a0
LP
703
704 if (typeflag != FTW_DP)
705 return 0;
706
707 if (ftwbuf->level < 1)
708 return 0;
709
e155a0aa 710 (void) rmdir(path);
e27796a0
LP
711 return 0;
712}
713
8c6db833 714int cg_trim(const char *controller, const char *path, bool delete_root) {
7027ff61 715 _cleanup_free_ char *fs = NULL;
2977724b 716 int r = 0, q;
8c6db833 717
8c6db833
LP
718 assert(path);
719
e27796a0
LP
720 r = cg_get_path(controller, path, NULL, &fs);
721 if (r < 0)
8c6db833
LP
722 return r;
723
e27796a0 724 errno = 0;
e155a0aa
LP
725 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
726 if (errno == ENOENT)
727 r = 0;
b3267152 728 else if (errno > 0)
e155a0aa
LP
729 r = -errno;
730 else
731 r = -EIO;
732 }
e27796a0
LP
733
734 if (delete_root) {
4ad49000
LP
735 if (rmdir(fs) < 0 && errno != ENOENT)
736 return -errno;
e27796a0
LP
737 }
738
2977724b
TH
739 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
740 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
741 if (q < 0)
742 log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
743 }
744
e27796a0 745 return r;
8c6db833
LP
746}
747
1434ae6f
LP
748int cg_create(const char *controller, const char *path) {
749 _cleanup_free_ char *fs = NULL;
750 int r;
751
752 r = cg_get_path_and_check(controller, path, NULL, &fs);
753 if (r < 0)
754 return r;
755
756 r = mkdir_parents(fs, 0755);
757 if (r < 0)
758 return r;
759
760 if (mkdir(fs, 0755) < 0) {
761
762 if (errno == EEXIST)
763 return 0;
764
765 return -errno;
766 }
767
2977724b
TH
768 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
769 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
770 if (r < 0)
771 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
772 }
773
1434ae6f
LP
774 return 1;
775}
776
777int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
778 int r, q;
779
780 assert(pid >= 0);
781
782 r = cg_create(controller, path);
783 if (r < 0)
784 return r;
785
786 q = cg_attach(controller, path, pid);
787 if (q < 0)
788 return q;
789
790 /* This does not remove the cgroup on failure */
791 return r;
792}
793
8c6db833 794int cg_attach(const char *controller, const char *path, pid_t pid) {
574d5f2d
LP
795 _cleanup_free_ char *fs = NULL;
796 char c[DECIMAL_STR_MAX(pid_t) + 2];
8c6db833
LP
797 int r;
798
8c6db833
LP
799 assert(path);
800 assert(pid >= 0);
801
b043cd0b 802 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
3474ae3c 803 if (r < 0)
c6c18be3 804 return r;
8c6db833
LP
805
806 if (pid == 0)
807 pid = getpid();
808
d054f0a4 809 xsprintf(c, PID_FMT "\n", pid);
8c6db833 810
2977724b
TH
811 r = write_string_file(fs, c, 0);
812 if (r < 0)
813 return r;
814
815 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
816 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
817 if (r < 0)
818 log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path);
819 }
820
821 return 0;
8c6db833
LP
822}
823
13b84ec7
LP
824int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
825 int r;
826
827 assert(controller);
828 assert(path);
829 assert(pid >= 0);
830
831 r = cg_attach(controller, path, pid);
832 if (r < 0) {
833 char prefix[strlen(path) + 1];
834
835 /* This didn't work? Then let's try all prefixes of
836 * the destination */
837
fecffe5d 838 PATH_FOREACH_PREFIX(prefix, path) {
e155a0aa
LP
839 int q;
840
841 q = cg_attach(controller, prefix, pid);
842 if (q >= 0)
843 return q;
13b84ec7
LP
844 }
845 }
846
e155a0aa 847 return r;
13b84ec7
LP
848}
849
2d76d14e
LP
850int cg_set_group_access(
851 const char *controller,
852 const char *path,
853 mode_t mode,
854 uid_t uid,
855 gid_t gid) {
856
574d5f2d 857 _cleanup_free_ char *fs = NULL;
8c6db833
LP
858 int r;
859
e155a0aa
LP
860 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
861 return 0;
8c6db833 862
fed1e721 863 if (mode != MODE_INVALID)
8d53b453
LP
864 mode &= 0777;
865
866 r = cg_get_path(controller, path, NULL, &fs);
867 if (r < 0)
8c6db833
LP
868 return r;
869
2977724b
TH
870 r = chmod_and_chown(fs, mode, uid, gid);
871 if (r < 0)
872 return r;
873
874 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
875 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
876 if (r < 0)
877 log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path);
878 }
879
880 return 0;
8c6db833
LP
881}
882
974efc46
LP
883int cg_set_task_access(
884 const char *controller,
885 const char *path,
886 mode_t mode,
887 uid_t uid,
4ad49000 888 gid_t gid) {
974efc46
LP
889
890 _cleanup_free_ char *fs = NULL, *procs = NULL;
415fc41c 891 int r;
8c6db833 892
8c6db833
LP
893 assert(path);
894
fed1e721 895 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
8d53b453
LP
896 return 0;
897
fed1e721 898 if (mode != MODE_INVALID)
8d53b453
LP
899 mode &= 0666;
900
b043cd0b 901 r = cg_get_path(controller, path, "cgroup.procs", &fs);
8d53b453 902 if (r < 0)
8c6db833
LP
903 return r;
904
905 r = chmod_and_chown(fs, mode, uid, gid);
974efc46
LP
906 if (r < 0)
907 return r;
8c6db833 908
2977724b
TH
909 if (!cg_unified(controller)) {
910 /* Compatibility, Always keep values for "tasks" in sync with
911 * "cgroup.procs" */
912 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
913 (void) chmod_and_chown(procs, mode, uid, gid);
914 }
efdb0237 915
2977724b
TH
916 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
917 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
918 if (r < 0)
919 log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path);
920 }
974efc46 921
efdb0237 922 return 0;
8c6db833
LP
923}
924
4b58153d
LP
925int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
926 _cleanup_free_ char *fs = NULL;
927 int r;
928
929 assert(path);
930 assert(name);
931 assert(value || size <= 0);
932
933 r = cg_get_path(controller, path, NULL, &fs);
934 if (r < 0)
935 return r;
936
937 if (setxattr(fs, name, value, size, flags) < 0)
938 return -errno;
939
940 return 0;
941}
942
943int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
944 _cleanup_free_ char *fs = NULL;
945 ssize_t n;
946 int r;
947
948 assert(path);
949 assert(name);
950
951 r = cg_get_path(controller, path, NULL, &fs);
952 if (r < 0)
953 return r;
954
955 n = getxattr(fs, name, value, size);
956 if (n < 0)
957 return -errno;
958
959 return (int) n;
960}
961
7027ff61 962int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
7027ff61
LP
963 _cleanup_fclose_ FILE *f = NULL;
964 char line[LINE_MAX];
b6629c4b 965 const char *fs, *controller_str;
efdb0237 966 size_t cs = 0;
415fc41c 967 bool unified;
8c6db833 968
8c6db833 969 assert(path);
c6c18be3 970 assert(pid >= 0);
8c6db833 971
5da38d07
TH
972 if (controller) {
973 if (!cg_controller_is_valid(controller))
974 return -EINVAL;
975 } else
976 controller = SYSTEMD_CGROUP_CONTROLLER;
977
978 unified = cg_unified(controller);
b6629c4b
TH
979 if (!unified) {
980 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
981 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
982 else
983 controller_str = controller;
984
985 cs = strlen(controller_str);
986 }
7027ff61 987
b68fa010 988 fs = procfs_file_alloca(pid, "cgroup");
c6c18be3 989 f = fopen(fs, "re");
4c633005
LP
990 if (!f)
991 return errno == ENOENT ? -ESRCH : -errno;
992
7027ff61 993 FOREACH_LINE(line, f, return -errno) {
efdb0237 994 char *e, *p;
c6c18be3
LP
995
996 truncate_nl(line);
997
efdb0237
LP
998 if (unified) {
999 e = startswith(line, "0:");
1000 if (!e)
1001 continue;
c6c18be3 1002
efdb0237
LP
1003 e = strchr(e, ':');
1004 if (!e)
1005 continue;
1006 } else {
1007 char *l;
1008 size_t k;
1009 const char *word, *state;
1010 bool found = false;
1011
1012 l = strchr(line, ':');
1013 if (!l)
1014 continue;
8af8afd6 1015
efdb0237
LP
1016 l++;
1017 e = strchr(l, ':');
1018 if (!e)
1019 continue;
8af8afd6 1020
efdb0237
LP
1021 *e = 0;
1022 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
b6629c4b 1023 if (k == cs && memcmp(word, controller_str, cs) == 0) {
efdb0237
LP
1024 found = true;
1025 break;
1026 }
8af8afd6
LP
1027 }
1028
efdb0237
LP
1029 if (!found)
1030 continue;
8af8afd6
LP
1031 }
1032
8af8afd6 1033 p = strdup(e + 1);
7027ff61
LP
1034 if (!p)
1035 return -ENOMEM;
c6c18be3
LP
1036
1037 *path = p;
7027ff61 1038 return 0;
c6c18be3
LP
1039 }
1040
1c80e425 1041 return -ENODATA;
8c6db833
LP
1042}
1043
1044int cg_install_release_agent(const char *controller, const char *agent) {
7027ff61 1045 _cleanup_free_ char *fs = NULL, *contents = NULL;
efdb0237 1046 const char *sc;
415fc41c 1047 int r;
8c6db833 1048
8c6db833
LP
1049 assert(agent);
1050
415fc41c 1051 if (cg_unified(controller)) /* doesn't apply to unified hierarchy */
efdb0237
LP
1052 return -EOPNOTSUPP;
1053
7027ff61
LP
1054 r = cg_get_path(controller, NULL, "release_agent", &fs);
1055 if (r < 0)
c6c18be3 1056 return r;
8c6db833 1057
7027ff61
LP
1058 r = read_one_line_file(fs, &contents);
1059 if (r < 0)
1060 return r;
8c6db833
LP
1061
1062 sc = strstrip(contents);
e155a0aa 1063 if (isempty(sc)) {
4c1fc3e4 1064 r = write_string_file(fs, agent, 0);
574d5f2d 1065 if (r < 0)
7027ff61 1066 return r;
b8725df8 1067 } else if (!path_equal(sc, agent))
7027ff61 1068 return -EEXIST;
8c6db833 1069
0da16248 1070 fs = mfree(fs);
7027ff61
LP
1071 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1072 if (r < 0)
1073 return r;
8c6db833 1074
0da16248 1075 contents = mfree(contents);
7027ff61
LP
1076 r = read_one_line_file(fs, &contents);
1077 if (r < 0)
1078 return r;
8c6db833
LP
1079
1080 sc = strstrip(contents);
8c6db833 1081 if (streq(sc, "0")) {
4c1fc3e4 1082 r = write_string_file(fs, "1", 0);
7027ff61
LP
1083 if (r < 0)
1084 return r;
c6c18be3 1085
7027ff61
LP
1086 return 1;
1087 }
8c6db833 1088
7027ff61
LP
1089 if (!streq(sc, "1"))
1090 return -EIO;
8c6db833 1091
7027ff61 1092 return 0;
8c6db833
LP
1093}
1094
ad929bcc
KS
1095int cg_uninstall_release_agent(const char *controller) {
1096 _cleanup_free_ char *fs = NULL;
415fc41c 1097 int r;
efdb0237 1098
415fc41c 1099 if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */
efdb0237 1100 return -EOPNOTSUPP;
ad929bcc 1101
ac9ef333
LP
1102 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1103 if (r < 0)
1104 return r;
1105
4c1fc3e4 1106 r = write_string_file(fs, "0", 0);
ac9ef333
LP
1107 if (r < 0)
1108 return r;
1109
0da16248 1110 fs = mfree(fs);
ac9ef333 1111
ad929bcc
KS
1112 r = cg_get_path(controller, NULL, "release_agent", &fs);
1113 if (r < 0)
1114 return r;
1115
4c1fc3e4 1116 r = write_string_file(fs, "", 0);
ad929bcc
KS
1117 if (r < 0)
1118 return r;
1119
ac9ef333 1120 return 0;
ad929bcc
KS
1121}
1122
6f883237 1123int cg_is_empty(const char *controller, const char *path) {
7027ff61 1124 _cleanup_fclose_ FILE *f = NULL;
efdb0237 1125 pid_t pid;
7027ff61 1126 int r;
8c6db833 1127
8c6db833
LP
1128 assert(path);
1129
b043cd0b 1130 r = cg_enumerate_processes(controller, path, &f);
6f883237
LP
1131 if (r == -ENOENT)
1132 return 1;
c3175a7f 1133 if (r < 0)
6f883237 1134 return r;
8c6db833 1135
6f883237 1136 r = cg_read_pid(f, &pid);
c6c18be3
LP
1137 if (r < 0)
1138 return r;
8c6db833 1139
6f883237 1140 return r == 0;
8c6db833
LP
1141}
1142
6f883237 1143int cg_is_empty_recursive(const char *controller, const char *path) {
415fc41c 1144 int r;
8c6db833 1145
8c6db833
LP
1146 assert(path);
1147
6fd66507
LP
1148 /* The root cgroup is always populated */
1149 if (controller && (isempty(path) || path_equal(path, "/")))
efdb0237 1150 return false;
6fd66507 1151
415fc41c 1152 if (cg_unified(controller)) {
ab2c3861 1153 _cleanup_free_ char *t = NULL;
8c6db833 1154
efdb0237 1155 /* On the unified hierarchy we can check empty state
ab2c3861 1156 * via the "populated" attribute of "cgroup.events". */
8c6db833 1157
ab2c3861 1158 r = cg_read_event(controller, path, "populated", &t);
efdb0237
LP
1159 if (r < 0)
1160 return r;
1161
1162 return streq(t, "0");
1163 } else {
1164 _cleanup_closedir_ DIR *d = NULL;
1165 char *fn;
8c6db833 1166
efdb0237 1167 r = cg_is_empty(controller, path);
35d2e7ec 1168 if (r <= 0)
7027ff61 1169 return r;
35d2e7ec 1170
efdb0237
LP
1171 r = cg_enumerate_subgroups(controller, path, &d);
1172 if (r == -ENOENT)
1173 return 1;
1174 if (r < 0)
1175 return r;
35d2e7ec 1176
efdb0237
LP
1177 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1178 _cleanup_free_ char *p = NULL;
1179
605405c6 1180 p = strjoin(path, "/", fn);
efdb0237
LP
1181 free(fn);
1182 if (!p)
1183 return -ENOMEM;
1184
1185 r = cg_is_empty_recursive(controller, p);
1186 if (r <= 0)
1187 return r;
1188 }
1189 if (r < 0)
1190 return r;
1191
1192 return true;
1193 }
35d2e7ec
LP
1194}
1195
1196int cg_split_spec(const char *spec, char **controller, char **path) {
35d2e7ec 1197 char *t = NULL, *u = NULL;
efdb0237 1198 const char *e;
35d2e7ec
LP
1199
1200 assert(spec);
35d2e7ec
LP
1201
1202 if (*spec == '/') {
e884315e
LP
1203 if (!path_is_safe(spec))
1204 return -EINVAL;
35d2e7ec
LP
1205
1206 if (path) {
246aa6dd
LP
1207 t = strdup(spec);
1208 if (!t)
35d2e7ec
LP
1209 return -ENOMEM;
1210
dbb9401d 1211 *path = path_kill_slashes(t);
8c6db833
LP
1212 }
1213
35d2e7ec
LP
1214 if (controller)
1215 *controller = NULL;
1216
1217 return 0;
8c6db833
LP
1218 }
1219
246aa6dd
LP
1220 e = strchr(spec, ':');
1221 if (!e) {
185a0874 1222 if (!cg_controller_is_valid(spec))
35d2e7ec
LP
1223 return -EINVAL;
1224
1225 if (controller) {
efdb0237 1226 t = strdup(spec);
246aa6dd 1227 if (!t)
35d2e7ec
LP
1228 return -ENOMEM;
1229
1230 *controller = t;
1231 }
1232
1233 if (path)
1234 *path = NULL;
1235
1236 return 0;
8c6db833
LP
1237 }
1238
efdb0237 1239 t = strndup(spec, e-spec);
e884315e
LP
1240 if (!t)
1241 return -ENOMEM;
185a0874 1242 if (!cg_controller_is_valid(t)) {
e884315e 1243 free(t);
35d2e7ec 1244 return -EINVAL;
246aa6dd
LP
1245 }
1246
efdb0237
LP
1247 if (isempty(e+1))
1248 u = NULL;
1249 else {
baa89da4
LP
1250 u = strdup(e+1);
1251 if (!u) {
1252 free(t);
1253 return -ENOMEM;
1254 }
35d2e7ec 1255
baa89da4
LP
1256 if (!path_is_safe(u) ||
1257 !path_is_absolute(u)) {
1258 free(t);
1259 free(u);
1260 return -EINVAL;
1261 }
1262
1263 path_kill_slashes(u);
1264 }
5954c074 1265
35d2e7ec
LP
1266 if (controller)
1267 *controller = t;
e884315e
LP
1268 else
1269 free(t);
35d2e7ec
LP
1270
1271 if (path)
1272 *path = u;
e884315e
LP
1273 else
1274 free(u);
35d2e7ec
LP
1275
1276 return 0;
8c6db833 1277}
c6c18be3 1278
7027ff61 1279int cg_mangle_path(const char *path, char **result) {
78edb35a
LP
1280 _cleanup_free_ char *c = NULL, *p = NULL;
1281 char *t;
35d2e7ec
LP
1282 int r;
1283
1284 assert(path);
1285 assert(result);
1286
73e231ab 1287 /* First, check if it already is a filesystem path */
7027ff61 1288 if (path_startswith(path, "/sys/fs/cgroup")) {
35d2e7ec 1289
b69d29ce
LP
1290 t = strdup(path);
1291 if (!t)
35d2e7ec
LP
1292 return -ENOMEM;
1293
dbb9401d 1294 *result = path_kill_slashes(t);
35d2e7ec
LP
1295 return 0;
1296 }
1297
73e231ab 1298 /* Otherwise, treat it as cg spec */
b69d29ce
LP
1299 r = cg_split_spec(path, &c, &p);
1300 if (r < 0)
35d2e7ec
LP
1301 return r;
1302
efdb0237 1303 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
35d2e7ec 1304}
1f73f0f1 1305
7027ff61 1306int cg_get_root_path(char **path) {
9444b1f2 1307 char *p, *e;
7027ff61
LP
1308 int r;
1309
1310 assert(path);
1311
9444b1f2 1312 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
7027ff61
LP
1313 if (r < 0)
1314 return r;
1315
efdb0237
LP
1316 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1317 if (!e)
1318 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1319 if (!e)
1320 e = endswith(p, "/system"); /* even more legacy */
9444b1f2 1321 if (e)
7027ff61
LP
1322 *e = 0;
1323
1f73f0f1
LP
1324 *path = p;
1325 return 0;
1326}
b59e2465 1327
751bc6ac
LP
1328int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1329 _cleanup_free_ char *rt = NULL;
1330 char *p;
ba1261bc
LP
1331 int r;
1332
e9174f29 1333 assert(cgroup);
751bc6ac 1334 assert(shifted);
e9174f29
LP
1335
1336 if (!root) {
1337 /* If the root was specified let's use that, otherwise
1338 * let's determine it from PID 1 */
1339
751bc6ac 1340 r = cg_get_root_path(&rt);
e9174f29
LP
1341 if (r < 0)
1342 return r;
1343
751bc6ac 1344 root = rt;
e9174f29 1345 }
ba1261bc 1346
751bc6ac 1347 p = path_startswith(cgroup, root);
efdb0237 1348 if (p && p > cgroup)
751bc6ac
LP
1349 *shifted = p - 1;
1350 else
1351 *shifted = cgroup;
1352
1353 return 0;
1354}
1355
1356int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1357 _cleanup_free_ char *raw = NULL;
1358 const char *c;
1359 int r;
1360
1361 assert(pid >= 0);
1362 assert(cgroup);
1363
1364 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
7027ff61 1365 if (r < 0)
ba1261bc 1366 return r;
ba1261bc 1367
751bc6ac
LP
1368 r = cg_shift_path(raw, root, &c);
1369 if (r < 0)
1370 return r;
ba1261bc 1371
751bc6ac
LP
1372 if (c == raw) {
1373 *cgroup = raw;
1374 raw = NULL;
1375 } else {
1376 char *n;
ba1261bc 1377
751bc6ac
LP
1378 n = strdup(c);
1379 if (!n)
ba1261bc 1380 return -ENOMEM;
ba1261bc 1381
751bc6ac
LP
1382 *cgroup = n;
1383 }
ba1261bc
LP
1384
1385 return 0;
1386}
1387
9ed794a3 1388int cg_path_decode_unit(const char *cgroup, char **unit) {
8b0849e9
LP
1389 char *c, *s;
1390 size_t n;
ef1673d1
MT
1391
1392 assert(cgroup);
6c03089c 1393 assert(unit);
ef1673d1 1394
8b0849e9
LP
1395 n = strcspn(cgroup, "/");
1396 if (n < 3)
1397 return -ENXIO;
1398
1399 c = strndupa(cgroup, n);
ae018d9b 1400 c = cg_unescape(c);
ef1673d1 1401
7410616c 1402 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
cfeaa44a 1403 return -ENXIO;
ef1673d1 1404
d7bd3de0 1405 s = strdup(c);
6c03089c
LP
1406 if (!s)
1407 return -ENOMEM;
1408
1409 *unit = s;
ef1673d1
MT
1410 return 0;
1411}
1412
8b0849e9
LP
1413static bool valid_slice_name(const char *p, size_t n) {
1414
1415 if (!p)
1416 return false;
1417
1418 if (n < strlen("x.slice"))
1419 return false;
1420
1421 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1422 char buf[n+1], *c;
1423
1424 memcpy(buf, p, n);
1425 buf[n] = 0;
1426
1427 c = cg_unescape(buf);
1428
7410616c 1429 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
8b0849e9
LP
1430 }
1431
1432 return false;
1433}
1434
9444b1f2 1435static const char *skip_slices(const char *p) {
8b0849e9
LP
1436 assert(p);
1437
9444b1f2
LP
1438 /* Skips over all slice assignments */
1439
1440 for (;;) {
1021b21b
LP
1441 size_t n;
1442
9444b1f2
LP
1443 p += strspn(p, "/");
1444
1445 n = strcspn(p, "/");
8b0849e9 1446 if (!valid_slice_name(p, n))
9444b1f2
LP
1447 return p;
1448
1449 p += n;
1450 }
1451}
1452
8b0849e9 1453int cg_path_get_unit(const char *path, char **ret) {
6c03089c 1454 const char *e;
8b0849e9
LP
1455 char *unit;
1456 int r;
6c03089c
LP
1457
1458 assert(path);
8b0849e9 1459 assert(ret);
6c03089c 1460
9444b1f2 1461 e = skip_slices(path);
6c03089c 1462
8b0849e9
LP
1463 r = cg_path_decode_unit(e, &unit);
1464 if (r < 0)
1465 return r;
1466
1467 /* We skipped over the slices, don't accept any now */
1468 if (endswith(unit, ".slice")) {
1469 free(unit);
1470 return -ENXIO;
1471 }
1472
1473 *ret = unit;
1474 return 0;
6c03089c
LP
1475}
1476
1477int cg_pid_get_unit(pid_t pid, char **unit) {
7fd1b19b 1478 _cleanup_free_ char *cgroup = NULL;
ba1261bc 1479 int r;
ba1261bc 1480
ef1673d1
MT
1481 assert(unit);
1482
7027ff61 1483 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
ef1673d1
MT
1484 if (r < 0)
1485 return r;
1486
6c03089c
LP
1487 return cg_path_get_unit(cgroup, unit);
1488}
ef1673d1 1489
d4fffc4b
ZJS
1490/**
1491 * Skip session-*.scope, but require it to be there.
1492 */
9444b1f2
LP
1493static const char *skip_session(const char *p) {
1494 size_t n;
1495
8b0849e9
LP
1496 if (isempty(p))
1497 return NULL;
9444b1f2
LP
1498
1499 p += strspn(p, "/");
1500
1501 n = strcspn(p, "/");
8b0849e9 1502 if (n < strlen("session-x.scope"))
d4fffc4b
ZJS
1503 return NULL;
1504
8b0849e9
LP
1505 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1506 char buf[n - 8 - 6 + 1];
1507
1508 memcpy(buf, p + 8, n - 8 - 6);
1509 buf[n - 8 - 6] = 0;
d4fffc4b 1510
8b0849e9
LP
1511 /* Note that session scopes never need unescaping,
1512 * since they cannot conflict with the kernel's own
1513 * names, hence we don't need to call cg_unescape()
1514 * here. */
1515
1516 if (!session_id_valid(buf))
1517 return false;
1518
1519 p += n;
1520 p += strspn(p, "/");
1521 return p;
1522 }
1523
1524 return NULL;
d4fffc4b
ZJS
1525}
1526
1527/**
1528 * Skip user@*.service, but require it to be there.
1529 */
1530static const char *skip_user_manager(const char *p) {
1531 size_t n;
1532
8b0849e9
LP
1533 if (isempty(p))
1534 return NULL;
d4fffc4b
ZJS
1535
1536 p += strspn(p, "/");
1537
1538 n = strcspn(p, "/");
8b0849e9 1539 if (n < strlen("user@x.service"))
6c03089c 1540 return NULL;
ef1673d1 1541
8b0849e9
LP
1542 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1543 char buf[n - 5 - 8 + 1];
9444b1f2 1544
8b0849e9
LP
1545 memcpy(buf, p + 5, n - 5 - 8);
1546 buf[n - 5 - 8] = 0;
1547
1548 /* Note that user manager services never need unescaping,
1549 * since they cannot conflict with the kernel's own
1550 * names, hence we don't need to call cg_unescape()
1551 * here. */
1552
1553 if (parse_uid(buf, NULL) < 0)
1554 return NULL;
1555
1556 p += n;
1557 p += strspn(p, "/");
1558
1559 return p;
1560 }
1561
1562 return NULL;
9444b1f2
LP
1563}
1564
329ac4bc 1565static const char *skip_user_prefix(const char *path) {
d4fffc4b 1566 const char *e, *t;
ef1673d1 1567
6c03089c 1568 assert(path);
ba1261bc 1569
9444b1f2
LP
1570 /* Skip slices, if there are any */
1571 e = skip_slices(path);
ba1261bc 1572
329ac4bc 1573 /* Skip the user manager, if it's in the path now... */
8b0849e9 1574 t = skip_user_manager(e);
329ac4bc
LP
1575 if (t)
1576 return t;
8b0849e9 1577
329ac4bc
LP
1578 /* Alternatively skip the user session if it is in the path... */
1579 return skip_session(e);
1580}
32081481 1581
329ac4bc
LP
1582int cg_path_get_user_unit(const char *path, char **ret) {
1583 const char *t;
6c03089c 1584
329ac4bc
LP
1585 assert(path);
1586 assert(ret);
8b0849e9 1587
329ac4bc
LP
1588 t = skip_user_prefix(path);
1589 if (!t)
8b0849e9 1590 return -ENXIO;
8b0849e9 1591
329ac4bc
LP
1592 /* And from here on it looks pretty much the same as for a
1593 * system unit, hence let's use the same parser from here
1594 * on. */
1595 return cg_path_get_unit(t, ret);
ef1673d1 1596}
ba1261bc 1597
ef1673d1 1598int cg_pid_get_user_unit(pid_t pid, char **unit) {
7fd1b19b 1599 _cleanup_free_ char *cgroup = NULL;
6c03089c
LP
1600 int r;
1601
1602 assert(unit);
1603
7027ff61 1604 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
6c03089c
LP
1605 if (r < 0)
1606 return r;
1607
1608 return cg_path_get_user_unit(cgroup, unit);
ba1261bc 1609}
e884315e 1610
7027ff61 1611int cg_path_get_machine_name(const char *path, char **machine) {
efdb0237
LP
1612 _cleanup_free_ char *u = NULL;
1613 const char *sl;
89f7c846 1614 int r;
374ec6ab 1615
89f7c846
LP
1616 r = cg_path_get_unit(path, &u);
1617 if (r < 0)
1618 return r;
7027ff61 1619
efdb0237 1620 sl = strjoina("/run/systemd/machines/unit:", u);
89f7c846 1621 return readlink_malloc(sl, machine);
7027ff61
LP
1622}
1623
1624int cg_pid_get_machine_name(pid_t pid, char **machine) {
7fd1b19b 1625 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1626 int r;
1627
1628 assert(machine);
1629
1630 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1631 if (r < 0)
1632 return r;
1633
1634 return cg_path_get_machine_name(cgroup, machine);
1635}
1636
1637int cg_path_get_session(const char *path, char **session) {
8b0849e9
LP
1638 _cleanup_free_ char *unit = NULL;
1639 char *start, *end;
1640 int r;
7027ff61
LP
1641
1642 assert(path);
7027ff61 1643
8b0849e9
LP
1644 r = cg_path_get_unit(path, &unit);
1645 if (r < 0)
1646 return r;
7027ff61 1647
8b0849e9
LP
1648 start = startswith(unit, "session-");
1649 if (!start)
cfeaa44a 1650 return -ENXIO;
8b0849e9
LP
1651 end = endswith(start, ".scope");
1652 if (!end)
cfeaa44a 1653 return -ENXIO;
8b0849e9
LP
1654
1655 *end = 0;
1656 if (!session_id_valid(start))
cfeaa44a 1657 return -ENXIO;
374ec6ab 1658
af08d2f9 1659 if (session) {
8b0849e9 1660 char *rr;
af08d2f9 1661
8b0849e9
LP
1662 rr = strdup(start);
1663 if (!rr)
af08d2f9
LP
1664 return -ENOMEM;
1665
8b0849e9 1666 *session = rr;
af08d2f9 1667 }
7027ff61 1668
7027ff61
LP
1669 return 0;
1670}
1671
1672int cg_pid_get_session(pid_t pid, char **session) {
7fd1b19b 1673 _cleanup_free_ char *cgroup = NULL;
7027ff61
LP
1674 int r;
1675
7027ff61
LP
1676 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1677 if (r < 0)
1678 return r;
1679
1680 return cg_path_get_session(cgroup, session);
1681}
1682
ae018d9b 1683int cg_path_get_owner_uid(const char *path, uid_t *uid) {
374ec6ab 1684 _cleanup_free_ char *slice = NULL;
8b0849e9 1685 char *start, *end;
374ec6ab 1686 int r;
ae018d9b
LP
1687
1688 assert(path);
ae018d9b 1689
374ec6ab
LP
1690 r = cg_path_get_slice(path, &slice);
1691 if (r < 0)
1692 return r;
ae018d9b 1693
674eb685
LP
1694 start = startswith(slice, "user-");
1695 if (!start)
cfeaa44a 1696 return -ENXIO;
8b0849e9 1697 end = endswith(start, ".slice");
674eb685 1698 if (!end)
cfeaa44a 1699 return -ENXIO;
ae018d9b 1700
8b0849e9
LP
1701 *end = 0;
1702 if (parse_uid(start, uid) < 0)
cfeaa44a 1703 return -ENXIO;
674eb685 1704
674eb685 1705 return 0;
ae018d9b
LP
1706}
1707
1708int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1709 _cleanup_free_ char *cgroup = NULL;
1710 int r;
1711
ae018d9b
LP
1712 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1713 if (r < 0)
1714 return r;
1715
1716 return cg_path_get_owner_uid(cgroup, uid);
1717}
1718
1021b21b
LP
1719int cg_path_get_slice(const char *p, char **slice) {
1720 const char *e = NULL;
1021b21b
LP
1721
1722 assert(p);
1723 assert(slice);
1724
329ac4bc
LP
1725 /* Finds the right-most slice unit from the beginning, but
1726 * stops before we come to the first non-slice unit. */
1727
1021b21b
LP
1728 for (;;) {
1729 size_t n;
1730
1731 p += strspn(p, "/");
1732
1733 n = strcspn(p, "/");
8b0849e9 1734 if (!valid_slice_name(p, n)) {
1021b21b 1735
8b0849e9
LP
1736 if (!e) {
1737 char *s;
1021b21b 1738
e5d855d3 1739 s = strdup(SPECIAL_ROOT_SLICE);
8b0849e9
LP
1740 if (!s)
1741 return -ENOMEM;
1021b21b 1742
8b0849e9
LP
1743 *slice = s;
1744 return 0;
1745 }
1746
1747 return cg_path_decode_unit(e, slice);
1021b21b
LP
1748 }
1749
1750 e = p;
1021b21b
LP
1751 p += n;
1752 }
1753}
1754
1755int cg_pid_get_slice(pid_t pid, char **slice) {
1756 _cleanup_free_ char *cgroup = NULL;
1757 int r;
1758
1759 assert(slice);
1760
1761 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1762 if (r < 0)
1763 return r;
1764
1765 return cg_path_get_slice(cgroup, slice);
1766}
1767
329ac4bc
LP
1768int cg_path_get_user_slice(const char *p, char **slice) {
1769 const char *t;
1770 assert(p);
1771 assert(slice);
1772
1773 t = skip_user_prefix(p);
1774 if (!t)
1775 return -ENXIO;
1776
1777 /* And now it looks pretty much the same as for a system
1778 * slice, so let's just use the same parser from here on. */
1779 return cg_path_get_slice(t, slice);
1780}
1781
1782int cg_pid_get_user_slice(pid_t pid, char **slice) {
1783 _cleanup_free_ char *cgroup = NULL;
1784 int r;
1785
1786 assert(slice);
1787
1788 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1789 if (r < 0)
1790 return r;
1791
1792 return cg_path_get_user_slice(cgroup, slice);
1793}
1794
ae018d9b
LP
1795char *cg_escape(const char *p) {
1796 bool need_prefix = false;
1797
1798 /* This implements very minimal escaping for names to be used
1799 * as file names in the cgroup tree: any name which might
1800 * conflict with a kernel name or is prefixed with '_' is
1801 * prefixed with a '_'. That way, when reading cgroup names it
1802 * is sufficient to remove a single prefixing underscore if
1803 * there is one. */
1804
1805 /* The return value of this function (unlike cg_unescape())
1806 * needs free()! */
1807
a0ab5665
LP
1808 if (p[0] == 0 ||
1809 p[0] == '_' ||
1810 p[0] == '.' ||
1811 streq(p, "notify_on_release") ||
1812 streq(p, "release_agent") ||
efdb0237
LP
1813 streq(p, "tasks") ||
1814 startswith(p, "cgroup."))
ae018d9b
LP
1815 need_prefix = true;
1816 else {
1817 const char *dot;
1818
1819 dot = strrchr(p, '.');
1820 if (dot) {
efdb0237
LP
1821 CGroupController c;
1822 size_t l = dot - p;
ae018d9b 1823
efdb0237
LP
1824 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1825 const char *n;
1826
1827 n = cgroup_controller_to_string(c);
ae018d9b 1828
efdb0237
LP
1829 if (l != strlen(n))
1830 continue;
ae018d9b 1831
efdb0237
LP
1832 if (memcmp(p, n, l) != 0)
1833 continue;
1834
1835 need_prefix = true;
1836 break;
ae018d9b
LP
1837 }
1838 }
1839 }
1840
1841 if (need_prefix)
1842 return strappend("_", p);
efdb0237
LP
1843
1844 return strdup(p);
ae018d9b
LP
1845}
1846
1847char *cg_unescape(const char *p) {
1848 assert(p);
1849
1850 /* The return value of this function (unlike cg_escape())
1851 * doesn't need free()! */
1852
1853 if (p[0] == '_')
1854 return (char*) p+1;
1855
1856 return (char*) p;
1857}
78edb35a
LP
1858
1859#define CONTROLLER_VALID \
4b549144 1860 DIGITS LETTERS \
78edb35a
LP
1861 "_"
1862
185a0874 1863bool cg_controller_is_valid(const char *p) {
78edb35a
LP
1864 const char *t, *s;
1865
1866 if (!p)
1867 return false;
1868
b6629c4b
TH
1869 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1870 return true;
1871
185a0874
DJL
1872 s = startswith(p, "name=");
1873 if (s)
1874 p = s;
78edb35a
LP
1875
1876 if (*p == 0 || *p == '_')
1877 return false;
1878
1879 for (t = p; *t; t++)
1880 if (!strchr(CONTROLLER_VALID, *t))
1881 return false;
1882
1883 if (t - p > FILENAME_MAX)
1884 return false;
1885
1886 return true;
1887}
a016b922
LP
1888
1889int cg_slice_to_path(const char *unit, char **ret) {
1890 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1891 const char *dash;
7410616c 1892 int r;
a016b922
LP
1893
1894 assert(unit);
1895 assert(ret);
1896
e5d855d3 1897 if (streq(unit, SPECIAL_ROOT_SLICE)) {
c96cc582
LP
1898 char *x;
1899
1900 x = strdup("");
1901 if (!x)
1902 return -ENOMEM;
1903 *ret = x;
1904 return 0;
1905 }
1906
7410616c 1907 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
a016b922
LP
1908 return -EINVAL;
1909
1910 if (!endswith(unit, ".slice"))
1911 return -EINVAL;
1912
7410616c
LP
1913 r = unit_name_to_prefix(unit, &p);
1914 if (r < 0)
1915 return r;
a016b922
LP
1916
1917 dash = strchr(p, '-');
e66e5b61
LP
1918
1919 /* Don't allow initial dashes */
1920 if (dash == p)
1921 return -EINVAL;
1922
a016b922
LP
1923 while (dash) {
1924 _cleanup_free_ char *escaped = NULL;
1925 char n[dash - p + sizeof(".slice")];
1926
e66e5b61
LP
1927 /* Don't allow trailing or double dashes */
1928 if (dash[1] == 0 || dash[1] == '-')
c96cc582 1929 return -EINVAL;
a016b922 1930
c96cc582 1931 strcpy(stpncpy(n, p, dash - p), ".slice");
7410616c 1932 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
a016b922
LP
1933 return -EINVAL;
1934
1935 escaped = cg_escape(n);
1936 if (!escaped)
1937 return -ENOMEM;
1938
1939 if (!strextend(&s, escaped, "/", NULL))
1940 return -ENOMEM;
1941
1942 dash = strchr(dash+1, '-');
1943 }
1944
1945 e = cg_escape(unit);
1946 if (!e)
1947 return -ENOMEM;
1948
1949 if (!strextend(&s, e, NULL))
1950 return -ENOMEM;
1951
1952 *ret = s;
1953 s = NULL;
1954
1955 return 0;
1956}
4ad49000
LP
1957
1958int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1959 _cleanup_free_ char *p = NULL;
1960 int r;
1961
1962 r = cg_get_path(controller, path, attribute, &p);
1963 if (r < 0)
1964 return r;
1965
4c1fc3e4 1966 return write_string_file(p, value, 0);
4ad49000
LP
1967}
1968
934277fe
LP
1969int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1970 _cleanup_free_ char *p = NULL;
1971 int r;
1972
1973 r = cg_get_path(controller, path, attribute, &p);
1974 if (r < 0)
1975 return r;
1976
1977 return read_one_line_file(p, ret);
1978}
1979
66ebf6c0
TH
1980int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
1981 _cleanup_free_ char *filename = NULL, *content = NULL;
1982 char *line, *p;
1983 int i, r;
1984
1985 for (i = 0; keys[i]; i++)
1986 values[i] = NULL;
1987
1988 r = cg_get_path(controller, path, attribute, &filename);
1989 if (r < 0)
1990 return r;
1991
1992 r = read_full_file(filename, &content, NULL);
1993 if (r < 0)
1994 return r;
1995
1996 p = content;
1997 while ((line = strsep(&p, "\n"))) {
1998 char *key;
1999
2000 key = strsep(&line, " ");
2001
2002 for (i = 0; keys[i]; i++) {
2003 if (streq(key, keys[i])) {
2004 values[i] = strdup(line);
2005 break;
2006 }
2007 }
2008 }
2009
2010 for (i = 0; keys[i]; i++) {
2011 if (!values[i]) {
2012 for (i = 0; keys[i]; i++) {
2013 free(values[i]);
2014 values[i] = NULL;
2015 }
2016 return -ENOENT;
2017 }
2018 }
2019
2020 return 0;
2021}
2022
efdb0237
LP
2023int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2024 CGroupController c;
415fc41c 2025 int r;
4ad49000
LP
2026
2027 /* This one will create a cgroup in our private tree, but also
2028 * duplicate it in the trees specified in mask, and remove it
2029 * in all others */
2030
2031 /* First create the cgroup in our own hierarchy. */
2032 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2033 if (r < 0)
2034 return r;
2035
efdb0237 2036 /* If we are in the unified hierarchy, we are done now */
415fc41c 2037 if (cg_all_unified())
efdb0237
LP
2038 return 0;
2039
2040 /* Otherwise, do the same in the other hierarchies */
2041 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2042 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2043 const char *n;
2044
2045 n = cgroup_controller_to_string(c);
2046
13b84ec7 2047 if (mask & bit)
efdb0237 2048 (void) cg_create(n, path);
13b84ec7 2049 else if (supported & bit)
efdb0237 2050 (void) cg_trim(n, path, true);
4ad49000
LP
2051 }
2052
13b84ec7 2053 return 0;
4ad49000
LP
2054}
2055
efdb0237
LP
2056int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2057 CGroupController c;
415fc41c 2058 int r;
4ad49000
LP
2059
2060 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13b84ec7
LP
2061 if (r < 0)
2062 return r;
4ad49000 2063
415fc41c 2064 if (cg_all_unified())
efdb0237 2065 return 0;
7b3fd631 2066
efdb0237
LP
2067 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2068 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2069 const char *p = NULL;
7b3fd631 2070
efdb0237
LP
2071 if (!(supported & bit))
2072 continue;
7b3fd631 2073
efdb0237
LP
2074 if (path_callback)
2075 p = path_callback(bit, userdata);
7b3fd631 2076
efdb0237
LP
2077 if (!p)
2078 p = path;
4ad49000 2079
efdb0237 2080 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
4ad49000
LP
2081 }
2082
13b84ec7 2083 return 0;
4ad49000
LP
2084}
2085
efdb0237 2086int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
6c12b52e
LP
2087 Iterator i;
2088 void *pidp;
2089 int r = 0;
2090
2091 SET_FOREACH(pidp, pids, i) {
fea72cc0 2092 pid_t pid = PTR_TO_PID(pidp);
13b84ec7 2093 int q;
6c12b52e 2094
7b3fd631 2095 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
efdb0237 2096 if (q < 0 && r >= 0)
13b84ec7 2097 r = q;
6c12b52e
LP
2098 }
2099
2100 return r;
2101}
2102
efdb0237 2103int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
b3c5bad3 2104 CGroupController c;
415fc41c 2105 int r = 0;
4ad49000 2106
13b84ec7 2107 if (!path_equal(from, to)) {
1d98fef1 2108 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
13b84ec7
LP
2109 if (r < 0)
2110 return r;
2111 }
4ad49000 2112
415fc41c 2113 if (cg_all_unified())
efdb0237 2114 return r;
03b90d4b 2115
efdb0237
LP
2116 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2117 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2118 const char *p = NULL;
03b90d4b 2119
efdb0237
LP
2120 if (!(supported & bit))
2121 continue;
03b90d4b 2122
efdb0237
LP
2123 if (to_callback)
2124 p = to_callback(bit, userdata);
4ad49000 2125
efdb0237
LP
2126 if (!p)
2127 p = to;
2128
1d98fef1 2129 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
4ad49000
LP
2130 }
2131
13b84ec7 2132 return 0;
4ad49000
LP
2133}
2134
efdb0237
LP
2135int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2136 CGroupController c;
415fc41c 2137 int r;
4ad49000
LP
2138
2139 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2140 if (r < 0)
2141 return r;
2142
415fc41c 2143 if (cg_all_unified())
efdb0237
LP
2144 return r;
2145
2146 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2147 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2148
2149 if (!(supported & bit))
2150 continue;
4ad49000 2151
efdb0237 2152 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
4ad49000
LP
2153 }
2154
13b84ec7 2155 return 0;
4ad49000
LP
2156}
2157
efdb0237
LP
2158int cg_mask_supported(CGroupMask *ret) {
2159 CGroupMask mask = 0;
415fc41c 2160 int r;
efdb0237
LP
2161
2162 /* Determines the mask of supported cgroup controllers. Only
2163 * includes controllers we can make sense of and that are
2164 * actually accessible. */
4ad49000 2165
415fc41c 2166 if (cg_all_unified()) {
5f4c5fef 2167 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
efdb0237
LP
2168 const char *c;
2169
2170 /* In the unified hierarchy we can read the supported
2171 * and accessible controllers from a the top-level
2172 * cgroup attribute */
2173
5f4c5fef
LP
2174 r = cg_get_root_path(&root);
2175 if (r < 0)
2176 return r;
2177
2178 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2179 if (r < 0)
2180 return r;
2181
2182 r = read_one_line_file(path, &controllers);
efdb0237
LP
2183 if (r < 0)
2184 return r;
4ad49000 2185
efdb0237
LP
2186 c = controllers;
2187 for (;;) {
2188 _cleanup_free_ char *n = NULL;
2189 CGroupController v;
2190
2191 r = extract_first_word(&c, &n, NULL, 0);
2192 if (r < 0)
2193 return r;
2194 if (r == 0)
2195 break;
2196
2197 v = cgroup_controller_from_string(n);
2198 if (v < 0)
2199 continue;
2200
2201 mask |= CGROUP_CONTROLLER_TO_MASK(v);
2202 }
2203
66ebf6c0 2204 /* Currently, we support the cpu, memory, io and pids
03a7b521
LP
2205 * controller in the unified hierarchy, mask
2206 * everything else off. */
66ebf6c0 2207 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
efdb0237
LP
2208
2209 } else {
2210 CGroupController c;
2211
2212 /* In the legacy hierarchy, we check whether which
2213 * hierarchies are mounted. */
2214
2215 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2216 const char *n;
2217
2218 n = cgroup_controller_to_string(c);
2219 if (controller_is_accessible(n) >= 0)
2220 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2221 }
4ad49000
LP
2222 }
2223
efdb0237
LP
2224 *ret = mask;
2225 return 0;
4ad49000 2226}
b12afc8c
LP
2227
2228int cg_kernel_controllers(Set *controllers) {
2229 _cleanup_fclose_ FILE *f = NULL;
2230 char buf[LINE_MAX];
2231 int r;
2232
2233 assert(controllers);
2234
e155a0aa
LP
2235 /* Determines the full list of kernel-known controllers. Might
2236 * include controllers we don't actually support, arbitrary
2237 * named hierarchies and controllers that aren't currently
2238 * accessible (because not mounted). */
2239
b12afc8c
LP
2240 f = fopen("/proc/cgroups", "re");
2241 if (!f) {
2242 if (errno == ENOENT)
2243 return 0;
2244 return -errno;
2245 }
2246
2247 /* Ignore the header line */
2248 (void) fgets(buf, sizeof(buf), f);
2249
2250 for (;;) {
2251 char *controller;
2252 int enabled = 0;
2253
2254 errno = 0;
2255 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2256
2257 if (feof(f))
2258 break;
2259
b3267152 2260 if (ferror(f) && errno > 0)
b12afc8c
LP
2261 return -errno;
2262
2263 return -EBADMSG;
2264 }
2265
2266 if (!enabled) {
2267 free(controller);
2268 continue;
2269 }
2270
efdb0237 2271 if (!cg_controller_is_valid(controller)) {
b12afc8c
LP
2272 free(controller);
2273 return -EBADMSG;
2274 }
2275
2276 r = set_consume(controllers, controller);
2277 if (r < 0)
2278 return r;
2279 }
2280
2281 return 0;
2282}
efdb0237 2283
5da38d07
TH
2284static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2285
f08e9287
TH
2286/* The hybrid mode was initially implemented in v232 and simply mounted
2287 * cgroup v2 on /sys/fs/cgroup/systemd. This unfortunately broke other
2288 * tools (such as docker) which expected the v1 "name=systemd" hierarchy
2289 * on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs
2290 * v2 on /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy
2291 * on /sys/fs/cgroup/systemd for compatibility with other tools.
2292 *
2293 * To keep live upgrade working, we detect and support v232 layout. When
2294 * v232 layout is detected, to keep cgroup v2 process management but
2295 * disable the compat dual layout, we return %true on
2296 * cg_unified(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2297 */
2298static thread_local bool unified_systemd_v232;
2299
5da38d07 2300static int cg_update_unified(void) {
efdb0237 2301
efdb0237
LP
2302 struct statfs fs;
2303
2304 /* Checks if we support the unified hierarchy. Returns an
2305 * error when the cgroup hierarchies aren't mounted yet or we
2306 * have any other trouble determining if the unified hierarchy
2307 * is supported. */
2308
5da38d07
TH
2309 if (unified_cache >= CGROUP_UNIFIED_NONE)
2310 return 0;
efdb0237
LP
2311
2312 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2313 return -errno;
2314
09961995 2315 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
5da38d07
TH
2316 unified_cache = CGROUP_UNIFIED_ALL;
2317 else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2977724b 2318 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
f08e9287 2319 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2977724b 2320 unified_cache = CGROUP_UNIFIED_SYSTEMD;
f08e9287
TH
2321 unified_systemd_v232 = false;
2322 } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 &&
2323 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2324 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2325 unified_systemd_v232 = true;
2326 } else {
2977724b
TH
2327 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2328 return -errno;
2329 if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2330 return -ENOMEDIUM;
2331 unified_cache = CGROUP_UNIFIED_NONE;
2332 }
5da38d07 2333 } else
8b3aa503 2334 return -ENOMEDIUM;
efdb0237 2335
5da38d07
TH
2336 return 0;
2337}
2338
415fc41c 2339bool cg_unified(const char *controller) {
5da38d07 2340
415fc41c 2341 assert(cg_update_unified() >= 0);
5da38d07 2342
fc9ae717
LP
2343 if (unified_cache == CGROUP_UNIFIED_NONE)
2344 return false;
2345
2346 if (unified_cache >= CGROUP_UNIFIED_ALL)
2347 return true;
2348
2349 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
5da38d07
TH
2350}
2351
415fc41c 2352bool cg_all_unified(void) {
5da38d07
TH
2353
2354 return cg_unified(NULL);
efdb0237
LP
2355}
2356
2977724b
TH
2357bool cg_hybrid_unified(void) {
2358
2359 assert(cg_update_unified() >= 0);
2360
f08e9287 2361 return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2977724b
TH
2362}
2363
415fc41c 2364int cg_unified_flush(void) {
5da38d07 2365 unified_cache = CGROUP_UNIFIED_UNKNOWN;
415fc41c
TH
2366
2367 return cg_update_unified();
efdb0237
LP
2368}
2369
2370int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2371 _cleanup_free_ char *fs = NULL;
2372 CGroupController c;
415fc41c 2373 int r;
efdb0237
LP
2374
2375 assert(p);
2376
2377 if (supported == 0)
2378 return 0;
2379
415fc41c 2380 if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */
efdb0237
LP
2381 return 0;
2382
2383 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2384 if (r < 0)
2385 return r;
2386
2387 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2388 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2389 const char *n;
2390
2391 if (!(supported & bit))
2392 continue;
2393
2394 n = cgroup_controller_to_string(c);
2395 {
2396 char s[1 + strlen(n) + 1];
2397
2398 s[0] = mask & bit ? '+' : '-';
2399 strcpy(s + 1, n);
2400
2401 r = write_string_file(fs, s, 0);
2402 if (r < 0)
98e4d8d7 2403 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
efdb0237
LP
2404 }
2405 }
2406
2407 return 0;
2408}
2409
2410bool cg_is_unified_wanted(void) {
2411 static thread_local int wanted = -1;
415fc41c 2412 int r;
1d84ad94 2413 bool b;
77fab2a9 2414 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
efdb0237 2415
77fab2a9 2416 /* If we have a cached value, return that. */
efdb0237
LP
2417 if (wanted >= 0)
2418 return wanted;
2419
239a3d09
ZJS
2420 /* If the hierarchy is already mounted, then follow whatever
2421 * was chosen for it. */
2422 if (cg_unified_flush() >= 0)
2423 return (wanted = cg_all_unified());
2424
77fab2a9
ZJS
2425 /* Otherwise, let's see what the kernel command line has to say.
2426 * Since checking is expensive, cache a non-error result. */
1d84ad94 2427 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
efdb0237 2428
77fab2a9 2429 return (wanted = r > 0 ? b : is_default);
efdb0237
LP
2430}
2431
2432bool cg_is_legacy_wanted(void) {
239a3d09
ZJS
2433 static thread_local int wanted = -1;
2434
2435 /* If we have a cached value, return that. */
2436 if (wanted >= 0)
2437 return wanted;
2438
1b59cf04
ZJS
2439 /* Check if we have cgroups2 already mounted. */
2440 if (cg_unified_flush() >= 0 &&
2441 unified_cache == CGROUP_UNIFIED_ALL)
239a3d09 2442 return (wanted = false);
1b59cf04
ZJS
2443
2444 /* Otherwise, assume that at least partial legacy is wanted,
2445 * since cgroups2 should already be mounted at this point. */
239a3d09 2446 return (wanted = true);
efdb0237
LP
2447}
2448
a4464b95 2449bool cg_is_hybrid_wanted(void) {
5da38d07 2450 static thread_local int wanted = -1;
415fc41c 2451 int r;
1d84ad94 2452 bool b;
c19739db
ZJS
2453 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2454 /* We default to true if the default is "hybrid", obviously,
2455 * but also when the default is "unified", because if we get
2456 * called, it means that unified hierarchy was not mounted. */
5da38d07 2457
77fab2a9 2458 /* If we have a cached value, return that. */
5da38d07
TH
2459 if (wanted >= 0)
2460 return wanted;
2461
239a3d09
ZJS
2462 /* If the hierarchy is already mounted, then follow whatever
2463 * was chosen for it. */
2464 if (cg_unified_flush() >= 0 &&
2465 unified_cache == CGROUP_UNIFIED_ALL)
2466 return (wanted = false);
2467
77fab2a9
ZJS
2468 /* Otherwise, let's see what the kernel command line has to say.
2469 * Since checking is expensive, cache a non-error result. */
1d84ad94 2470 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
5da38d07 2471
2dcb526d
ZJS
2472 /* The meaning of the kernel option is reversed wrt. to the return value
2473 * of this function, hence the negation. */
77fab2a9 2474 return (wanted = r > 0 ? !b : is_default);
5da38d07
TH
2475}
2476
13c31542
TH
2477int cg_weight_parse(const char *s, uint64_t *ret) {
2478 uint64_t u;
2479 int r;
2480
2481 if (isempty(s)) {
2482 *ret = CGROUP_WEIGHT_INVALID;
2483 return 0;
2484 }
2485
2486 r = safe_atou64(s, &u);
2487 if (r < 0)
2488 return r;
2489
2490 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2491 return -ERANGE;
2492
2493 *ret = u;
2494 return 0;
2495}
2496
9be57249
TH
2497const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2498 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2499 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
ac06a0cf
TH
2500 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2501 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
9be57249
TH
2502};
2503
2504static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2505 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2506 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
ac06a0cf
TH
2507 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2508 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
9be57249
TH
2509};
2510
2511DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2512
d53d9474
LP
2513int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2514 uint64_t u;
2515 int r;
2516
2517 if (isempty(s)) {
2518 *ret = CGROUP_CPU_SHARES_INVALID;
2519 return 0;
2520 }
2521
2522 r = safe_atou64(s, &u);
2523 if (r < 0)
2524 return r;
2525
2526 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2527 return -ERANGE;
2528
2529 *ret = u;
2530 return 0;
2531}
2532
2533int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2534 uint64_t u;
2535 int r;
2536
2537 if (isempty(s)) {
2538 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2539 return 0;
2540 }
2541
2542 r = safe_atou64(s, &u);
2543 if (r < 0)
2544 return r;
2545
2546 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2547 return -ERANGE;
2548
2549 *ret = u;
2550 return 0;
2551}
2552
f0bef277
EV
2553bool is_cgroup_fs(const struct statfs *s) {
2554 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2555 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2556}
2557
2558bool fd_is_cgroup_fs(int fd) {
2559 struct statfs s;
2560
2561 if (fstatfs(fd, &s) < 0)
2562 return -errno;
2563
2564 return is_cgroup_fs(&s);
2565}
2566
efdb0237
LP
2567static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2568 [CGROUP_CONTROLLER_CPU] = "cpu",
2569 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
13c31542 2570 [CGROUP_CONTROLLER_IO] = "io",
efdb0237
LP
2571 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2572 [CGROUP_CONTROLLER_MEMORY] = "memory",
3905f127 2573 [CGROUP_CONTROLLER_DEVICES] = "devices",
03a7b521 2574 [CGROUP_CONTROLLER_PIDS] = "pids",
efdb0237
LP
2575};
2576
2577DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);