]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/cgroup-util.c
man/systemd.mount: tmpfs automatically gains After=swap.target dep
[thirdparty/systemd.git] / src / basic / cgroup-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <limits.h>
5 #include <signal.h>
6 #include <stddef.h>
7 #include <stdlib.h>
8 #include <sys/types.h>
9 #include <sys/utsname.h>
10 #include <sys/xattr.h>
11 #include <unistd.h>
12
13 #include "alloc-util.h"
14 #include "cgroup-util.h"
15 #include "constants.h"
16 #include "dirent-util.h"
17 #include "extract-word.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "format-util.h"
21 #include "fs-util.h"
22 #include "log.h"
23 #include "login-util.h"
24 #include "macro.h"
25 #include "missing_magic.h"
26 #include "missing_threads.h"
27 #include "mkdir.h"
28 #include "parse-util.h"
29 #include "path-util.h"
30 #include "process-util.h"
31 #include "set.h"
32 #include "special.h"
33 #include "stat-util.h"
34 #include "stdio-util.h"
35 #include "string-table.h"
36 #include "string-util.h"
37 #include "strv.h"
38 #include "unit-name.h"
39 #include "user-util.h"
40 #include "xattr-util.h"
41
42 static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
43 _cleanup_free_ char *fs = NULL;
44 FILE *f;
45 int r;
46
47 assert(ret);
48
49 r = cg_get_path(controller, path, item, &fs);
50 if (r < 0)
51 return r;
52
53 f = fopen(fs, "re");
54 if (!f)
55 return -errno;
56
57 *ret = f;
58 return 0;
59 }
60
61 int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
62 return cg_enumerate_items(controller, path, ret, "cgroup.procs");
63 }
64
65 int cg_read_pid(FILE *f, pid_t *ret) {
66 unsigned long ul;
67
68 /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
69
70 assert(f);
71 assert(ret);
72
73 errno = 0;
74 if (fscanf(f, "%lu", &ul) != 1) {
75
76 if (feof(f)) {
77 *ret = 0;
78 return 0;
79 }
80
81 return errno_or_else(EIO);
82 }
83
84 if (ul <= 0)
85 return -EIO;
86 if (ul > PID_T_MAX)
87 return -EIO;
88
89 *ret = (pid_t) ul;
90 return 1;
91 }
92
93 int cg_read_pidref(FILE *f, PidRef *ret) {
94 int r;
95
96 assert(f);
97 assert(ret);
98
99 for (;;) {
100 pid_t pid;
101
102 r = cg_read_pid(f, &pid);
103 if (r < 0)
104 return r;
105 if (r == 0) {
106 *ret = PIDREF_NULL;
107 return 0;
108 }
109
110 r = pidref_set_pid(ret, pid);
111 if (r >= 0)
112 return 1;
113 if (r != -ESRCH)
114 return r;
115
116 /* ESRCH → gone by now? just skip over it, read the next */
117 }
118 }
119
120 int cg_read_event(
121 const char *controller,
122 const char *path,
123 const char *event,
124 char **ret) {
125
126 _cleanup_free_ char *events = NULL, *content = NULL;
127 int r;
128
129 r = cg_get_path(controller, path, "cgroup.events", &events);
130 if (r < 0)
131 return r;
132
133 r = read_full_virtual_file(events, &content, NULL);
134 if (r < 0)
135 return r;
136
137 for (const char *p = content;;) {
138 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
139 const char *q;
140
141 r = extract_first_word(&p, &line, "\n", 0);
142 if (r < 0)
143 return r;
144 if (r == 0)
145 return -ENOENT;
146
147 q = line;
148 r = extract_first_word(&q, &key, " ", 0);
149 if (r < 0)
150 return r;
151 if (r == 0)
152 return -EINVAL;
153
154 if (!streq(key, event))
155 continue;
156
157 val = strdup(q);
158 if (!val)
159 return -ENOMEM;
160
161 *ret = TAKE_PTR(val);
162 return 0;
163 }
164 }
165
166 bool cg_ns_supported(void) {
167 static thread_local int enabled = -1;
168
169 if (enabled >= 0)
170 return enabled;
171
172 if (access("/proc/self/ns/cgroup", F_OK) < 0) {
173 if (errno != ENOENT)
174 log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
175 enabled = false;
176 } else
177 enabled = true;
178
179 return enabled;
180 }
181
182 bool cg_freezer_supported(void) {
183 static thread_local int supported = -1;
184
185 if (supported >= 0)
186 return supported;
187
188 supported = cg_all_unified() > 0 && access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) == 0;
189
190 return supported;
191 }
192
193 bool cg_kill_supported(void) {
194 static thread_local int supported = -1;
195
196 if (supported >= 0)
197 return supported;
198
199 if (cg_all_unified() <= 0)
200 supported = false;
201 else if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) < 0) {
202 if (errno != ENOENT)
203 log_debug_errno(errno, "Failed to check if cgroup.kill is available, assuming not: %m");
204 supported = false;
205 } else
206 supported = true;
207
208 return supported;
209 }
210
211 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
212 _cleanup_free_ char *fs = NULL;
213 DIR *d;
214 int r;
215
216 assert(ret);
217
218 /* This is not recursive! */
219
220 r = cg_get_path(controller, path, NULL, &fs);
221 if (r < 0)
222 return r;
223
224 d = opendir(fs);
225 if (!d)
226 return -errno;
227
228 *ret = d;
229 return 0;
230 }
231
232 int cg_read_subgroup(DIR *d, char **ret) {
233 assert(d);
234 assert(ret);
235
236 FOREACH_DIRENT_ALL(de, d, return -errno) {
237 char *b;
238
239 if (de->d_type != DT_DIR)
240 continue;
241
242 if (dot_or_dot_dot(de->d_name))
243 continue;
244
245 b = strdup(de->d_name);
246 if (!b)
247 return -ENOMEM;
248
249 *ret = b;
250 return 1;
251 }
252
253 *ret = NULL;
254 return 0;
255 }
256
257 int cg_rmdir(const char *controller, const char *path) {
258 _cleanup_free_ char *p = NULL;
259 int r;
260
261 r = cg_get_path(controller, path, NULL, &p);
262 if (r < 0)
263 return r;
264
265 r = rmdir(p);
266 if (r < 0 && errno != ENOENT)
267 return -errno;
268
269 r = cg_hybrid_unified();
270 if (r <= 0)
271 return r;
272
273 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
274 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
275 if (r < 0)
276 log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
277 }
278
279 return 0;
280 }
281
282 static int cg_kill_items(
283 const char *path,
284 int sig,
285 CGroupFlags flags,
286 Set *s,
287 cg_kill_log_func_t log_kill,
288 void *userdata,
289 const char *item) {
290
291 _cleanup_set_free_ Set *allocated_set = NULL;
292 bool done = false;
293 int r, ret = 0, ret_log_kill = 0;
294
295 assert(sig >= 0);
296
297 /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
298 * SIGCONT on SIGKILL. */
299 if (IN_SET(sig, SIGCONT, SIGKILL))
300 flags &= ~CGROUP_SIGCONT;
301
302 /* This goes through the tasks list and kills them all. This
303 * is repeated until no further processes are added to the
304 * tasks list, to properly handle forking processes */
305
306 if (!s) {
307 s = allocated_set = set_new(NULL);
308 if (!s)
309 return -ENOMEM;
310 }
311
312 do {
313 _cleanup_fclose_ FILE *f = NULL;
314 done = true;
315
316 r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
317 if (r == -ENOENT)
318 break;
319 if (r < 0)
320 return RET_GATHER(ret, r);
321
322 for (;;) {
323 _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
324
325 r = cg_read_pidref(f, &pidref);
326 if (r < 0)
327 return RET_GATHER(ret, r);
328 if (r == 0)
329 break;
330
331 if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
332 continue;
333
334 if (set_get(s, PID_TO_PTR(pidref.pid)) == PID_TO_PTR(pidref.pid))
335 continue;
336
337 if (log_kill)
338 ret_log_kill = log_kill(&pidref, sig, userdata);
339
340 /* If we haven't killed this process yet, kill it */
341 r = pidref_kill(&pidref, sig);
342 if (r < 0 && r != -ESRCH)
343 RET_GATHER(ret, r);
344 if (r >= 0) {
345 if (flags & CGROUP_SIGCONT)
346 (void) pidref_kill(&pidref, SIGCONT);
347
348 if (ret == 0) {
349 if (log_kill)
350 ret = ret_log_kill;
351 else
352 ret = 1;
353 }
354 }
355
356 done = false;
357
358 r = set_put(s, PID_TO_PTR(pidref.pid));
359 if (r < 0)
360 return RET_GATHER(ret, r);
361 }
362
363 /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
364 * until no new pids need to be killed. */
365
366 } while (!done);
367
368 return ret;
369 }
370
371 int cg_kill(
372 const char *path,
373 int sig,
374 CGroupFlags flags,
375 Set *s,
376 cg_kill_log_func_t log_kill,
377 void *userdata) {
378
379 int r, ret;
380
381 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.procs");
382 if (r < 0 || sig != SIGKILL)
383 return r;
384
385 ret = r;
386
387 /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
388 a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
389 (4340d175b898) and 4.14.138 (feb6b123b7dd). */
390 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
391 if (r < 0)
392 return r;
393 if (r == 0)
394 return ret;
395
396 r = cg_kill_items(path, sig, flags, s, log_kill, userdata, "cgroup.threads");
397 if (r < 0)
398 return r;
399
400 return r > 0 || ret > 0;
401 }
402
403 int cg_kill_kernel_sigkill(const char *path) {
404 /* Kills the cgroup at `path` directly by writing to its cgroup.kill file. This sends SIGKILL to all
405 * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
406
407 _cleanup_free_ char *killfile = NULL;
408 int r;
409
410 assert(path);
411
412 if (!cg_kill_supported())
413 return -EOPNOTSUPP;
414
415 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
416 if (r < 0)
417 return r;
418
419 r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
420 if (r < 0)
421 return r;
422
423 return 0;
424 }
425
426 int cg_kill_recursive(
427 const char *path,
428 int sig,
429 CGroupFlags flags,
430 Set *s,
431 cg_kill_log_func_t log_kill,
432 void *userdata) {
433
434 int r, ret;
435
436 assert(path);
437 assert(sig >= 0);
438
439 if (sig == SIGKILL && cg_kill_supported() &&
440 !FLAGS_SET(flags, CGROUP_IGNORE_SELF) && !s && !log_kill)
441 /* ignore CGROUP_SIGCONT, since this is a no-op alongside SIGKILL */
442 ret = cg_kill_kernel_sigkill(path);
443 else {
444 _cleanup_set_free_ Set *allocated_set = NULL;
445 _cleanup_closedir_ DIR *d = NULL;
446
447 if (!s) {
448 s = allocated_set = set_new(NULL);
449 if (!s)
450 return -ENOMEM;
451 }
452
453 ret = cg_kill(path, sig, flags, s, log_kill, userdata);
454
455 r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
456 if (r < 0) {
457 if (r != -ENOENT)
458 RET_GATHER(ret, r);
459
460 return ret;
461 }
462
463 for (;;) {
464 _cleanup_free_ char *fn = NULL, *p = NULL;
465
466 r = cg_read_subgroup(d, &fn);
467 if (r < 0) {
468 RET_GATHER(ret, r);
469 break;
470 }
471 if (r == 0)
472 break;
473
474 p = path_join(empty_to_root(path), fn);
475 if (!p)
476 return -ENOMEM;
477
478 r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
479 if (r != 0 && ret >= 0)
480 ret = r;
481 }
482 }
483
484 if (FLAGS_SET(flags, CGROUP_REMOVE)) {
485 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, path);
486 if (!IN_SET(r, -ENOENT, -EBUSY))
487 RET_GATHER(ret, r);
488 }
489
490 return ret;
491 }
492
493 static const char *controller_to_dirname(const char *controller) {
494 assert(controller);
495
496 /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
497 * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
498 * specified. */
499
500 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
501 if (cg_hybrid_unified() > 0)
502 controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
503 else
504 controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
505 }
506
507 return startswith(controller, "name=") ?: controller;
508 }
509
510 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
511 const char *dn;
512 char *t = NULL;
513
514 assert(ret);
515 assert(controller);
516
517 dn = controller_to_dirname(controller);
518
519 if (isempty(path) && isempty(suffix))
520 t = path_join("/sys/fs/cgroup", dn);
521 else if (isempty(path))
522 t = path_join("/sys/fs/cgroup", dn, suffix);
523 else if (isempty(suffix))
524 t = path_join("/sys/fs/cgroup", dn, path);
525 else
526 t = path_join("/sys/fs/cgroup", dn, path, suffix);
527 if (!t)
528 return -ENOMEM;
529
530 *ret = t;
531 return 0;
532 }
533
534 static int join_path_unified(const char *path, const char *suffix, char **ret) {
535 char *t;
536
537 assert(ret);
538
539 if (isempty(path) && isempty(suffix))
540 t = strdup("/sys/fs/cgroup");
541 else if (isempty(path))
542 t = path_join("/sys/fs/cgroup", suffix);
543 else if (isempty(suffix))
544 t = path_join("/sys/fs/cgroup", path);
545 else
546 t = path_join("/sys/fs/cgroup", path, suffix);
547 if (!t)
548 return -ENOMEM;
549
550 *ret = t;
551 return 0;
552 }
553
554 int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
555 int r;
556
557 assert(ret);
558
559 if (!controller) {
560 char *t;
561
562 /* If no controller is specified, we return the path *below* the controllers, without any
563 * prefix. */
564
565 if (isempty(path) && isempty(suffix))
566 return -EINVAL;
567
568 if (isempty(suffix))
569 t = strdup(path);
570 else if (isempty(path))
571 t = strdup(suffix);
572 else
573 t = path_join(path, suffix);
574 if (!t)
575 return -ENOMEM;
576
577 *ret = path_simplify(t);
578 return 0;
579 }
580
581 if (!cg_controller_is_valid(controller))
582 return -EINVAL;
583
584 r = cg_all_unified();
585 if (r < 0)
586 return r;
587 if (r > 0)
588 r = join_path_unified(path, suffix, ret);
589 else
590 r = join_path_legacy(controller, path, suffix, ret);
591 if (r < 0)
592 return r;
593
594 path_simplify(*ret);
595 return 0;
596 }
597
598 static int controller_is_v1_accessible(const char *root, const char *controller) {
599 const char *cpath, *dn;
600
601 assert(controller);
602
603 dn = controller_to_dirname(controller);
604
605 /* If root if specified, we check that:
606 * - possible subcgroup is created at root,
607 * - we can modify the hierarchy. */
608
609 cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
610 return laccess(cpath, root ? W_OK : F_OK);
611 }
612
613 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
614 int r;
615
616 assert(controller);
617 assert(ret);
618
619 if (!cg_controller_is_valid(controller))
620 return -EINVAL;
621
622 r = cg_all_unified();
623 if (r < 0)
624 return r;
625 if (r > 0) {
626 /* In the unified hierarchy all controllers are considered accessible,
627 * except for the named hierarchies */
628 if (startswith(controller, "name="))
629 return -EOPNOTSUPP;
630 } else {
631 /* Check if the specified controller is actually accessible */
632 r = controller_is_v1_accessible(NULL, controller);
633 if (r < 0)
634 return r;
635 }
636
637 return cg_get_path(controller, path, suffix, ret);
638 }
639
640 int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
641 _cleanup_free_ char *fs = NULL;
642 int r;
643
644 assert(path);
645 assert(name);
646 assert(value || size <= 0);
647
648 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
649 if (r < 0)
650 return r;
651
652 return RET_NERRNO(setxattr(fs, name, value, size, flags));
653 }
654
655 int cg_get_xattr(const char *path, const char *name, void *value, size_t size) {
656 _cleanup_free_ char *fs = NULL;
657 ssize_t n;
658 int r;
659
660 assert(path);
661 assert(name);
662
663 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
664 if (r < 0)
665 return r;
666
667 n = getxattr(fs, name, value, size);
668 if (n < 0)
669 return -errno;
670
671 return (int) n;
672 }
673
674 int cg_get_xattr_malloc(const char *path, const char *name, char **ret) {
675 _cleanup_free_ char *fs = NULL;
676 int r;
677
678 assert(path);
679 assert(name);
680
681 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
682 if (r < 0)
683 return r;
684
685 return lgetxattr_malloc(fs, name, ret);
686 }
687
688 int cg_get_xattr_bool(const char *path, const char *name) {
689 _cleanup_free_ char *fs = NULL;
690 int r;
691
692 assert(path);
693 assert(name);
694
695 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
696 if (r < 0)
697 return r;
698
699 return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
700 }
701
702 int cg_remove_xattr(const char *path, const char *name) {
703 _cleanup_free_ char *fs = NULL;
704 int r;
705
706 assert(path);
707 assert(name);
708
709 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
710 if (r < 0)
711 return r;
712
713 return RET_NERRNO(removexattr(fs, name));
714 }
715
716 int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
717 _cleanup_fclose_ FILE *f = NULL;
718 const char *fs, *controller_str = NULL; /* avoid false maybe-uninitialized warning */
719 int unified, r;
720
721 assert(pid >= 0);
722 assert(ret_path);
723
724 if (controller) {
725 if (!cg_controller_is_valid(controller))
726 return -EINVAL;
727 } else
728 controller = SYSTEMD_CGROUP_CONTROLLER;
729
730 unified = cg_unified_controller(controller);
731 if (unified < 0)
732 return unified;
733 if (unified == 0) {
734 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
735 controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
736 else
737 controller_str = controller;
738 }
739
740 fs = procfs_file_alloca(pid, "cgroup");
741 r = fopen_unlocked(fs, "re", &f);
742 if (r == -ENOENT)
743 return -ESRCH;
744 if (r < 0)
745 return r;
746
747 for (;;) {
748 _cleanup_free_ char *line = NULL;
749 char *e;
750
751 r = read_line(f, LONG_LINE_MAX, &line);
752 if (r < 0)
753 return r;
754 if (r == 0)
755 return -ENODATA;
756
757 if (unified) {
758 e = startswith(line, "0:");
759 if (!e)
760 continue;
761
762 e = strchr(e, ':');
763 if (!e)
764 continue;
765 } else {
766 char *l;
767
768 l = strchr(line, ':');
769 if (!l)
770 continue;
771
772 l++;
773 e = strchr(l, ':');
774 if (!e)
775 continue;
776 *e = 0;
777
778 assert(controller_str);
779 r = string_contains_word(l, ",", controller_str);
780 if (r < 0)
781 return r;
782 if (r == 0)
783 continue;
784 }
785
786 char *path = strdup(e + 1);
787 if (!path)
788 return -ENOMEM;
789
790 /* Truncate suffix indicating the process is a zombie */
791 e = endswith(path, " (deleted)");
792 if (e)
793 *e = 0;
794
795 *ret_path = path;
796 return 0;
797 }
798 }
799
800 int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
801 _cleanup_free_ char *path = NULL;
802 int r;
803
804 assert(ret_path);
805
806 if (!pidref_is_set(pidref))
807 return -ESRCH;
808
809 r = cg_pid_get_path(controller, pidref->pid, &path);
810 if (r < 0)
811 return r;
812
813 /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
814 r = pidref_verify(pidref);
815 if (r < 0)
816 return r;
817
818 *ret_path = TAKE_PTR(path);
819 return 0;
820 }
821
822 int cg_install_release_agent(const char *controller, const char *agent) {
823 _cleanup_free_ char *fs = NULL, *contents = NULL;
824 const char *sc;
825 int r;
826
827 assert(agent);
828
829 r = cg_unified_controller(controller);
830 if (r < 0)
831 return r;
832 if (r > 0) /* doesn't apply to unified hierarchy */
833 return -EOPNOTSUPP;
834
835 r = cg_get_path(controller, NULL, "release_agent", &fs);
836 if (r < 0)
837 return r;
838
839 r = read_one_line_file(fs, &contents);
840 if (r < 0)
841 return r;
842
843 sc = strstrip(contents);
844 if (isempty(sc)) {
845 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
846 if (r < 0)
847 return r;
848 } else if (!path_equal(sc, agent))
849 return -EEXIST;
850
851 fs = mfree(fs);
852 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
853 if (r < 0)
854 return r;
855
856 contents = mfree(contents);
857 r = read_one_line_file(fs, &contents);
858 if (r < 0)
859 return r;
860
861 sc = strstrip(contents);
862 if (streq(sc, "0")) {
863 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
864 if (r < 0)
865 return r;
866
867 return 1;
868 }
869
870 if (!streq(sc, "1"))
871 return -EIO;
872
873 return 0;
874 }
875
876 int cg_uninstall_release_agent(const char *controller) {
877 _cleanup_free_ char *fs = NULL;
878 int r;
879
880 r = cg_unified_controller(controller);
881 if (r < 0)
882 return r;
883 if (r > 0) /* Doesn't apply to unified hierarchy */
884 return -EOPNOTSUPP;
885
886 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
887 if (r < 0)
888 return r;
889
890 r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
891 if (r < 0)
892 return r;
893
894 fs = mfree(fs);
895
896 r = cg_get_path(controller, NULL, "release_agent", &fs);
897 if (r < 0)
898 return r;
899
900 r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
901 if (r < 0)
902 return r;
903
904 return 0;
905 }
906
907 int cg_is_empty(const char *controller, const char *path) {
908 _cleanup_fclose_ FILE *f = NULL;
909 pid_t pid;
910 int r;
911
912 assert(path);
913
914 r = cg_enumerate_processes(controller, path, &f);
915 if (r == -ENOENT)
916 return true;
917 if (r < 0)
918 return r;
919
920 r = cg_read_pid(f, &pid);
921 if (r < 0)
922 return r;
923
924 return r == 0;
925 }
926
927 int cg_is_empty_recursive(const char *controller, const char *path) {
928 int r;
929
930 assert(path);
931
932 /* The root cgroup is always populated */
933 if (controller && empty_or_root(path))
934 return false;
935
936 r = cg_unified_controller(controller);
937 if (r < 0)
938 return r;
939 if (r > 0) {
940 _cleanup_free_ char *t = NULL;
941
942 /* On the unified hierarchy we can check empty state
943 * via the "populated" attribute of "cgroup.events". */
944
945 r = cg_read_event(controller, path, "populated", &t);
946 if (r == -ENOENT)
947 return true;
948 if (r < 0)
949 return r;
950
951 return streq(t, "0");
952 } else {
953 _cleanup_closedir_ DIR *d = NULL;
954 char *fn;
955
956 r = cg_is_empty(controller, path);
957 if (r <= 0)
958 return r;
959
960 r = cg_enumerate_subgroups(controller, path, &d);
961 if (r == -ENOENT)
962 return true;
963 if (r < 0)
964 return r;
965
966 while ((r = cg_read_subgroup(d, &fn)) > 0) {
967 _cleanup_free_ char *p = NULL;
968
969 p = path_join(path, fn);
970 free(fn);
971 if (!p)
972 return -ENOMEM;
973
974 r = cg_is_empty_recursive(controller, p);
975 if (r <= 0)
976 return r;
977 }
978 if (r < 0)
979 return r;
980
981 return true;
982 }
983 }
984
985 int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
986 _cleanup_free_ char *controller = NULL, *path = NULL;
987 int r;
988
989 assert(spec);
990
991 if (*spec == '/') {
992 if (!path_is_normalized(spec))
993 return -EINVAL;
994
995 if (ret_path) {
996 r = path_simplify_alloc(spec, &path);
997 if (r < 0)
998 return r;
999 }
1000
1001 } else {
1002 const char *e;
1003
1004 e = strchr(spec, ':');
1005 if (e) {
1006 controller = strndup(spec, e-spec);
1007 if (!controller)
1008 return -ENOMEM;
1009 if (!cg_controller_is_valid(controller))
1010 return -EINVAL;
1011
1012 if (!isempty(e + 1)) {
1013 path = strdup(e+1);
1014 if (!path)
1015 return -ENOMEM;
1016
1017 if (!path_is_normalized(path) ||
1018 !path_is_absolute(path))
1019 return -EINVAL;
1020
1021 path_simplify(path);
1022 }
1023
1024 } else {
1025 if (!cg_controller_is_valid(spec))
1026 return -EINVAL;
1027
1028 if (ret_controller) {
1029 controller = strdup(spec);
1030 if (!controller)
1031 return -ENOMEM;
1032 }
1033 }
1034 }
1035
1036 if (ret_controller)
1037 *ret_controller = TAKE_PTR(controller);
1038 if (ret_path)
1039 *ret_path = TAKE_PTR(path);
1040 return 0;
1041 }
1042
1043 int cg_mangle_path(const char *path, char **ret) {
1044 _cleanup_free_ char *c = NULL, *p = NULL;
1045 int r;
1046
1047 assert(path);
1048 assert(ret);
1049
1050 /* First, check if it already is a filesystem path */
1051 if (path_startswith(path, "/sys/fs/cgroup"))
1052 return path_simplify_alloc(path, ret);
1053
1054 /* Otherwise, treat it as cg spec */
1055 r = cg_split_spec(path, &c, &p);
1056 if (r < 0)
1057 return r;
1058
1059 return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
1060 }
1061
1062 int cg_get_root_path(char **ret_path) {
1063 char *p, *e;
1064 int r;
1065
1066 assert(ret_path);
1067
1068 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1069 if (r < 0)
1070 return r;
1071
1072 e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1073 if (!e)
1074 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1075 if (!e)
1076 e = endswith(p, "/system"); /* even more legacy */
1077 if (e)
1078 *e = 0;
1079
1080 *ret_path = p;
1081 return 0;
1082 }
1083
1084 int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
1085 _cleanup_free_ char *rt = NULL;
1086 char *p;
1087 int r;
1088
1089 assert(cgroup);
1090 assert(ret_shifted);
1091
1092 if (!root) {
1093 /* If the root was specified let's use that, otherwise
1094 * let's determine it from PID 1 */
1095
1096 r = cg_get_root_path(&rt);
1097 if (r < 0)
1098 return r;
1099
1100 root = rt;
1101 }
1102
1103 p = path_startswith(cgroup, root);
1104 if (p && p > cgroup)
1105 *ret_shifted = p - 1;
1106 else
1107 *ret_shifted = cgroup;
1108
1109 return 0;
1110 }
1111
1112 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
1113 _cleanup_free_ char *raw = NULL;
1114 const char *c;
1115 int r;
1116
1117 assert(pid >= 0);
1118 assert(ret_cgroup);
1119
1120 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1121 if (r < 0)
1122 return r;
1123
1124 r = cg_shift_path(raw, root, &c);
1125 if (r < 0)
1126 return r;
1127
1128 if (c == raw)
1129 *ret_cgroup = TAKE_PTR(raw);
1130 else {
1131 char *n;
1132
1133 n = strdup(c);
1134 if (!n)
1135 return -ENOMEM;
1136
1137 *ret_cgroup = n;
1138 }
1139
1140 return 0;
1141 }
1142
1143 int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
1144 char *c, *s;
1145 size_t n;
1146
1147 assert(cgroup);
1148 assert(ret_unit);
1149
1150 n = strcspn(cgroup, "/");
1151 if (n < 3)
1152 return -ENXIO;
1153
1154 c = strndupa_safe(cgroup, n);
1155 c = cg_unescape(c);
1156
1157 if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1158 return -ENXIO;
1159
1160 s = strdup(c);
1161 if (!s)
1162 return -ENOMEM;
1163
1164 *ret_unit = s;
1165 return 0;
1166 }
1167
1168 static bool valid_slice_name(const char *p, size_t n) {
1169
1170 if (!p)
1171 return false;
1172
1173 if (n < STRLEN("x.slice"))
1174 return false;
1175
1176 if (memcmp(p + n - 6, ".slice", 6) == 0) {
1177 char buf[n+1], *c;
1178
1179 memcpy(buf, p, n);
1180 buf[n] = 0;
1181
1182 c = cg_unescape(buf);
1183
1184 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1185 }
1186
1187 return false;
1188 }
1189
1190 static const char *skip_slices(const char *p) {
1191 assert(p);
1192
1193 /* Skips over all slice assignments */
1194
1195 for (;;) {
1196 size_t n;
1197
1198 p += strspn(p, "/");
1199
1200 n = strcspn(p, "/");
1201 if (!valid_slice_name(p, n))
1202 return p;
1203
1204 p += n;
1205 }
1206 }
1207
1208 int cg_path_get_unit(const char *path, char **ret) {
1209 _cleanup_free_ char *unit = NULL;
1210 const char *e;
1211 int r;
1212
1213 assert(path);
1214 assert(ret);
1215
1216 e = skip_slices(path);
1217
1218 r = cg_path_decode_unit(e, &unit);
1219 if (r < 0)
1220 return r;
1221
1222 /* We skipped over the slices, don't accept any now */
1223 if (endswith(unit, ".slice"))
1224 return -ENXIO;
1225
1226 *ret = TAKE_PTR(unit);
1227 return 0;
1228 }
1229
1230 int cg_path_get_unit_path(const char *path, char **ret) {
1231 _cleanup_free_ char *path_copy = NULL;
1232 char *unit_name;
1233
1234 assert(path);
1235 assert(ret);
1236
1237 path_copy = strdup(path);
1238 if (!path_copy)
1239 return -ENOMEM;
1240
1241 unit_name = (char *)skip_slices(path_copy);
1242 unit_name[strcspn(unit_name, "/")] = 0;
1243
1244 if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1245 return -ENXIO;
1246
1247 *ret = TAKE_PTR(path_copy);
1248
1249 return 0;
1250 }
1251
1252 int cg_pid_get_unit(pid_t pid, char **ret_unit) {
1253 _cleanup_free_ char *cgroup = NULL;
1254 int r;
1255
1256 assert(ret_unit);
1257
1258 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1259 if (r < 0)
1260 return r;
1261
1262 return cg_path_get_unit(cgroup, ret_unit);
1263 }
1264
1265 int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
1266 _cleanup_free_ char *unit = NULL;
1267 int r;
1268
1269 assert(ret);
1270
1271 if (!pidref_is_set(pidref))
1272 return -ESRCH;
1273
1274 r = cg_pid_get_unit(pidref->pid, &unit);
1275 if (r < 0)
1276 return r;
1277
1278 r = pidref_verify(pidref);
1279 if (r < 0)
1280 return r;
1281
1282 *ret = TAKE_PTR(unit);
1283 return 0;
1284 }
1285
1286 /**
1287 * Skip session-*.scope, but require it to be there.
1288 */
1289 static const char *skip_session(const char *p) {
1290 size_t n;
1291
1292 if (isempty(p))
1293 return NULL;
1294
1295 p += strspn(p, "/");
1296
1297 n = strcspn(p, "/");
1298 if (n < STRLEN("session-x.scope"))
1299 return NULL;
1300
1301 if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1302 char buf[n - 8 - 6 + 1];
1303
1304 memcpy(buf, p + 8, n - 8 - 6);
1305 buf[n - 8 - 6] = 0;
1306
1307 /* Note that session scopes never need unescaping,
1308 * since they cannot conflict with the kernel's own
1309 * names, hence we don't need to call cg_unescape()
1310 * here. */
1311
1312 if (!session_id_valid(buf))
1313 return NULL;
1314
1315 p += n;
1316 p += strspn(p, "/");
1317 return p;
1318 }
1319
1320 return NULL;
1321 }
1322
1323 /**
1324 * Skip user@*.service, but require it to be there.
1325 */
1326 static const char *skip_user_manager(const char *p) {
1327 size_t n;
1328
1329 if (isempty(p))
1330 return NULL;
1331
1332 p += strspn(p, "/");
1333
1334 n = strcspn(p, "/");
1335 if (n < STRLEN("user@x.service"))
1336 return NULL;
1337
1338 if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1339 char buf[n - 5 - 8 + 1];
1340
1341 memcpy(buf, p + 5, n - 5 - 8);
1342 buf[n - 5 - 8] = 0;
1343
1344 /* Note that user manager services never need unescaping,
1345 * since they cannot conflict with the kernel's own
1346 * names, hence we don't need to call cg_unescape()
1347 * here. */
1348
1349 if (parse_uid(buf, NULL) < 0)
1350 return NULL;
1351
1352 p += n;
1353 p += strspn(p, "/");
1354
1355 return p;
1356 }
1357
1358 return NULL;
1359 }
1360
1361 static const char *skip_user_prefix(const char *path) {
1362 const char *e, *t;
1363
1364 assert(path);
1365
1366 /* Skip slices, if there are any */
1367 e = skip_slices(path);
1368
1369 /* Skip the user manager, if it's in the path now... */
1370 t = skip_user_manager(e);
1371 if (t)
1372 return t;
1373
1374 /* Alternatively skip the user session if it is in the path... */
1375 return skip_session(e);
1376 }
1377
1378 int cg_path_get_user_unit(const char *path, char **ret) {
1379 const char *t;
1380
1381 assert(path);
1382 assert(ret);
1383
1384 t = skip_user_prefix(path);
1385 if (!t)
1386 return -ENXIO;
1387
1388 /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1389 * parser. */
1390 return cg_path_get_unit(t, ret);
1391 }
1392
1393 int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
1394 _cleanup_free_ char *cgroup = NULL;
1395 int r;
1396
1397 assert(ret_unit);
1398
1399 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1400 if (r < 0)
1401 return r;
1402
1403 return cg_path_get_user_unit(cgroup, ret_unit);
1404 }
1405
1406 int cg_path_get_machine_name(const char *path, char **ret_machine) {
1407 _cleanup_free_ char *u = NULL;
1408 const char *sl;
1409 int r;
1410
1411 r = cg_path_get_unit(path, &u);
1412 if (r < 0)
1413 return r;
1414
1415 sl = strjoina("/run/systemd/machines/unit:", u);
1416 return readlink_malloc(sl, ret_machine);
1417 }
1418
1419 int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
1420 _cleanup_free_ char *cgroup = NULL;
1421 int r;
1422
1423 assert(ret_machine);
1424
1425 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1426 if (r < 0)
1427 return r;
1428
1429 return cg_path_get_machine_name(cgroup, ret_machine);
1430 }
1431
1432 int cg_path_get_cgroupid(const char *path, uint64_t *ret) {
1433 cg_file_handle fh = CG_FILE_HANDLE_INIT;
1434 int mnt_id = -1;
1435
1436 assert(path);
1437 assert(ret);
1438
1439 /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
1440 * name_to_handle_at_loop() does in mountpoint-util.c */
1441 if (name_to_handle_at(AT_FDCWD, path, &fh.file_handle, &mnt_id, 0) < 0)
1442 return -errno;
1443
1444 *ret = CG_FILE_HANDLE_CGROUPID(fh);
1445 return 0;
1446 }
1447
1448 int cg_path_get_session(const char *path, char **ret_session) {
1449 _cleanup_free_ char *unit = NULL;
1450 char *start, *end;
1451 int r;
1452
1453 assert(path);
1454
1455 r = cg_path_get_unit(path, &unit);
1456 if (r < 0)
1457 return r;
1458
1459 start = startswith(unit, "session-");
1460 if (!start)
1461 return -ENXIO;
1462 end = endswith(start, ".scope");
1463 if (!end)
1464 return -ENXIO;
1465
1466 *end = 0;
1467 if (!session_id_valid(start))
1468 return -ENXIO;
1469
1470 if (ret_session) {
1471 char *rr;
1472
1473 rr = strdup(start);
1474 if (!rr)
1475 return -ENOMEM;
1476
1477 *ret_session = rr;
1478 }
1479
1480 return 0;
1481 }
1482
1483 int cg_pid_get_session(pid_t pid, char **ret_session) {
1484 _cleanup_free_ char *cgroup = NULL;
1485 int r;
1486
1487 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1488 if (r < 0)
1489 return r;
1490
1491 return cg_path_get_session(cgroup, ret_session);
1492 }
1493
1494 int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
1495 _cleanup_free_ char *slice = NULL;
1496 char *start, *end;
1497 int r;
1498
1499 assert(path);
1500
1501 r = cg_path_get_slice(path, &slice);
1502 if (r < 0)
1503 return r;
1504
1505 start = startswith(slice, "user-");
1506 if (!start)
1507 return -ENXIO;
1508
1509 end = endswith(start, ".slice");
1510 if (!end)
1511 return -ENXIO;
1512
1513 *end = 0;
1514 if (parse_uid(start, ret_uid) < 0)
1515 return -ENXIO;
1516
1517 return 0;
1518 }
1519
1520 int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
1521 _cleanup_free_ char *cgroup = NULL;
1522 int r;
1523
1524 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1525 if (r < 0)
1526 return r;
1527
1528 return cg_path_get_owner_uid(cgroup, ret_uid);
1529 }
1530
1531 int cg_path_get_slice(const char *p, char **ret_slice) {
1532 const char *e = NULL;
1533
1534 assert(p);
1535 assert(ret_slice);
1536
1537 /* Finds the right-most slice unit from the beginning, but
1538 * stops before we come to the first non-slice unit. */
1539
1540 for (;;) {
1541 size_t n;
1542
1543 p += strspn(p, "/");
1544
1545 n = strcspn(p, "/");
1546 if (!valid_slice_name(p, n)) {
1547
1548 if (!e) {
1549 char *s;
1550
1551 s = strdup(SPECIAL_ROOT_SLICE);
1552 if (!s)
1553 return -ENOMEM;
1554
1555 *ret_slice = s;
1556 return 0;
1557 }
1558
1559 return cg_path_decode_unit(e, ret_slice);
1560 }
1561
1562 e = p;
1563 p += n;
1564 }
1565 }
1566
1567 int cg_pid_get_slice(pid_t pid, char **ret_slice) {
1568 _cleanup_free_ char *cgroup = NULL;
1569 int r;
1570
1571 assert(ret_slice);
1572
1573 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1574 if (r < 0)
1575 return r;
1576
1577 return cg_path_get_slice(cgroup, ret_slice);
1578 }
1579
1580 int cg_path_get_user_slice(const char *p, char **ret_slice) {
1581 const char *t;
1582 assert(p);
1583 assert(ret_slice);
1584
1585 t = skip_user_prefix(p);
1586 if (!t)
1587 return -ENXIO;
1588
1589 /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1590 * from here on. */
1591 return cg_path_get_slice(t, ret_slice);
1592 }
1593
1594 int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
1595 _cleanup_free_ char *cgroup = NULL;
1596 int r;
1597
1598 assert(ret_slice);
1599
1600 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1601 if (r < 0)
1602 return r;
1603
1604 return cg_path_get_user_slice(cgroup, ret_slice);
1605 }
1606
1607 bool cg_needs_escape(const char *p) {
1608
1609 /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1610 * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1611 * they shall be used. Also note that various names cannot be made valid by escaping even if we
1612 * return true here (because too long, or contain the forbidden character "/"). */
1613
1614 if (!filename_is_valid(p))
1615 return true;
1616
1617 if (IN_SET(p[0], '_', '.'))
1618 return true;
1619
1620 if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
1621 return true;
1622
1623 if (startswith(p, "cgroup."))
1624 return true;
1625
1626 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1627 const char *q;
1628
1629 q = startswith(p, cgroup_controller_to_string(c));
1630 if (!q)
1631 continue;
1632
1633 if (q[0] == '.')
1634 return true;
1635 }
1636
1637 return false;
1638 }
1639
1640 int cg_escape(const char *p, char **ret) {
1641 _cleanup_free_ char *n = NULL;
1642
1643 /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1644 * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1645 * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1646 * is one. */
1647
1648 /* The return value of this function (unlike cg_unescape()) needs free()! */
1649
1650 if (cg_needs_escape(p)) {
1651 n = strjoin("_", p);
1652 if (!n)
1653 return -ENOMEM;
1654
1655 if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
1656 return -EINVAL;
1657 } else {
1658 n = strdup(p);
1659 if (!n)
1660 return -ENOMEM;
1661 }
1662
1663 *ret = TAKE_PTR(n);
1664 return 0;
1665 }
1666
1667 char *cg_unescape(const char *p) {
1668 assert(p);
1669
1670 /* The return value of this function (unlike cg_escape())
1671 * doesn't need free()! */
1672
1673 if (p[0] == '_')
1674 return (char*) p+1;
1675
1676 return (char*) p;
1677 }
1678
1679 #define CONTROLLER_VALID \
1680 DIGITS LETTERS \
1681 "_"
1682
1683 bool cg_controller_is_valid(const char *p) {
1684 const char *t, *s;
1685
1686 if (!p)
1687 return false;
1688
1689 if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1690 return true;
1691
1692 s = startswith(p, "name=");
1693 if (s)
1694 p = s;
1695
1696 if (IN_SET(*p, 0, '_'))
1697 return false;
1698
1699 for (t = p; *t; t++)
1700 if (!strchr(CONTROLLER_VALID, *t))
1701 return false;
1702
1703 if (t - p > NAME_MAX)
1704 return false;
1705
1706 return true;
1707 }
1708
1709 int cg_slice_to_path(const char *unit, char **ret) {
1710 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1711 const char *dash;
1712 int r;
1713
1714 assert(unit);
1715 assert(ret);
1716
1717 if (streq(unit, SPECIAL_ROOT_SLICE)) {
1718 char *x;
1719
1720 x = strdup("");
1721 if (!x)
1722 return -ENOMEM;
1723 *ret = x;
1724 return 0;
1725 }
1726
1727 if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1728 return -EINVAL;
1729
1730 if (!endswith(unit, ".slice"))
1731 return -EINVAL;
1732
1733 r = unit_name_to_prefix(unit, &p);
1734 if (r < 0)
1735 return r;
1736
1737 dash = strchr(p, '-');
1738
1739 /* Don't allow initial dashes */
1740 if (dash == p)
1741 return -EINVAL;
1742
1743 while (dash) {
1744 _cleanup_free_ char *escaped = NULL;
1745 char n[dash - p + sizeof(".slice")];
1746
1747 #if HAS_FEATURE_MEMORY_SANITIZER
1748 /* msan doesn't instrument stpncpy, so it thinks
1749 * n is later used uninitialized:
1750 * https://github.com/google/sanitizers/issues/926
1751 */
1752 zero(n);
1753 #endif
1754
1755 /* Don't allow trailing or double dashes */
1756 if (IN_SET(dash[1], 0, '-'))
1757 return -EINVAL;
1758
1759 strcpy(stpncpy(n, p, dash - p), ".slice");
1760 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1761 return -EINVAL;
1762
1763 r = cg_escape(n, &escaped);
1764 if (r < 0)
1765 return r;
1766
1767 if (!strextend(&s, escaped, "/"))
1768 return -ENOMEM;
1769
1770 dash = strchr(dash+1, '-');
1771 }
1772
1773 r = cg_escape(unit, &e);
1774 if (r < 0)
1775 return r;
1776
1777 if (!strextend(&s, e))
1778 return -ENOMEM;
1779
1780 *ret = TAKE_PTR(s);
1781 return 0;
1782 }
1783
1784 int cg_is_threaded(const char *path) {
1785 _cleanup_free_ char *fs = NULL, *contents = NULL;
1786 _cleanup_strv_free_ char **v = NULL;
1787 int r;
1788
1789 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
1790 if (r < 0)
1791 return r;
1792
1793 r = read_full_virtual_file(fs, &contents, NULL);
1794 if (r == -ENOENT)
1795 return false; /* Assume no. */
1796 if (r < 0)
1797 return r;
1798
1799 v = strv_split(contents, NULL);
1800 if (!v)
1801 return -ENOMEM;
1802
1803 /* If the cgroup is in the threaded mode, it contains "threaded".
1804 * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1805 return strv_contains(v, "threaded") || strv_contains(v, "invalid");
1806 }
1807
1808 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1809 _cleanup_free_ char *p = NULL;
1810 int r;
1811
1812 r = cg_get_path(controller, path, attribute, &p);
1813 if (r < 0)
1814 return r;
1815
1816 return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
1817 }
1818
1819 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1820 _cleanup_free_ char *p = NULL;
1821 int r;
1822
1823 r = cg_get_path(controller, path, attribute, &p);
1824 if (r < 0)
1825 return r;
1826
1827 return read_one_line_file(p, ret);
1828 }
1829
1830 int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
1831 _cleanup_free_ char *value = NULL;
1832 uint64_t v;
1833 int r;
1834
1835 assert(ret);
1836
1837 r = cg_get_attribute(controller, path, attribute, &value);
1838 if (r == -ENOENT)
1839 return -ENODATA;
1840 if (r < 0)
1841 return r;
1842
1843 if (streq(value, "max")) {
1844 *ret = CGROUP_LIMIT_MAX;
1845 return 0;
1846 }
1847
1848 r = safe_atou64(value, &v);
1849 if (r < 0)
1850 return r;
1851
1852 *ret = v;
1853 return 0;
1854 }
1855
1856 int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
1857 _cleanup_free_ char *value = NULL;
1858 int r;
1859
1860 assert(ret);
1861
1862 r = cg_get_attribute(controller, path, attribute, &value);
1863 if (r == -ENOENT)
1864 return -ENODATA;
1865 if (r < 0)
1866 return r;
1867
1868 r = parse_boolean(value);
1869 if (r < 0)
1870 return r;
1871
1872 *ret = r;
1873 return 0;
1874 }
1875
1876 int cg_get_owner(const char *path, uid_t *ret_uid) {
1877 _cleanup_free_ char *f = NULL;
1878 struct stat stats;
1879 int r;
1880
1881 assert(ret_uid);
1882
1883 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
1884 if (r < 0)
1885 return r;
1886
1887 if (stat(f, &stats) < 0)
1888 return -errno;
1889
1890 r = stat_verify_directory(&stats);
1891 if (r < 0)
1892 return r;
1893
1894 *ret_uid = stats.st_uid;
1895 return 0;
1896 }
1897
1898 int cg_get_keyed_attribute_full(
1899 const char *controller,
1900 const char *path,
1901 const char *attribute,
1902 char **keys,
1903 char **ret_values,
1904 CGroupKeyMode mode) {
1905
1906 _cleanup_free_ char *filename = NULL, *contents = NULL;
1907 const char *p;
1908 size_t n, i, n_done = 0;
1909 char **v;
1910 int r;
1911
1912 /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1913 * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1914 * entries as 'keys'. On success each entry will be set to the value of the matching key.
1915 *
1916 * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1917 * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
1918
1919 r = cg_get_path(controller, path, attribute, &filename);
1920 if (r < 0)
1921 return r;
1922
1923 r = read_full_file(filename, &contents, NULL);
1924 if (r < 0)
1925 return r;
1926
1927 n = strv_length(keys);
1928 if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1929 return 0;
1930
1931 /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1932 v = newa0(char*, n);
1933
1934 for (p = contents; *p;) {
1935 const char *w = NULL;
1936
1937 for (i = 0; i < n; i++)
1938 if (!v[i]) {
1939 w = first_word(p, keys[i]);
1940 if (w)
1941 break;
1942 }
1943
1944 if (w) {
1945 size_t l;
1946
1947 l = strcspn(w, NEWLINE);
1948 v[i] = strndup(w, l);
1949 if (!v[i]) {
1950 r = -ENOMEM;
1951 goto fail;
1952 }
1953
1954 n_done++;
1955 if (n_done >= n)
1956 goto done;
1957
1958 p = w + l;
1959 } else
1960 p += strcspn(p, NEWLINE);
1961
1962 p += strspn(p, NEWLINE);
1963 }
1964
1965 if (mode & CG_KEY_MODE_GRACEFUL)
1966 goto done;
1967
1968 r = -ENXIO;
1969
1970 fail:
1971 free_many_charp(v, n);
1972 return r;
1973
1974 done:
1975 memcpy(ret_values, v, sizeof(char*) * n);
1976 if (mode & CG_KEY_MODE_GRACEFUL)
1977 return n_done;
1978
1979 return 0;
1980 }
1981
1982 int cg_mask_to_string(CGroupMask mask, char **ret) {
1983 _cleanup_free_ char *s = NULL;
1984 bool space = false;
1985 CGroupController c;
1986 size_t n = 0;
1987
1988 assert(ret);
1989
1990 if (mask == 0) {
1991 *ret = NULL;
1992 return 0;
1993 }
1994
1995 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1996 const char *k;
1997 size_t l;
1998
1999 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
2000 continue;
2001
2002 k = cgroup_controller_to_string(c);
2003 l = strlen(k);
2004
2005 if (!GREEDY_REALLOC(s, n + space + l + 1))
2006 return -ENOMEM;
2007
2008 if (space)
2009 s[n] = ' ';
2010 memcpy(s + n + space, k, l);
2011 n += space + l;
2012
2013 space = true;
2014 }
2015
2016 assert(s);
2017
2018 s[n] = 0;
2019 *ret = TAKE_PTR(s);
2020
2021 return 0;
2022 }
2023
2024 int cg_mask_from_string(const char *value, CGroupMask *ret) {
2025 CGroupMask m = 0;
2026
2027 assert(ret);
2028 assert(value);
2029
2030 for (;;) {
2031 _cleanup_free_ char *n = NULL;
2032 CGroupController v;
2033 int r;
2034
2035 r = extract_first_word(&value, &n, NULL, 0);
2036 if (r < 0)
2037 return r;
2038 if (r == 0)
2039 break;
2040
2041 v = cgroup_controller_from_string(n);
2042 if (v < 0)
2043 continue;
2044
2045 m |= CGROUP_CONTROLLER_TO_MASK(v);
2046 }
2047
2048 *ret = m;
2049 return 0;
2050 }
2051
2052 int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
2053 CGroupMask mask;
2054 int r;
2055
2056 /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2057 * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2058 * pseudo-controllers. */
2059
2060 r = cg_all_unified();
2061 if (r < 0)
2062 return r;
2063 if (r > 0) {
2064 _cleanup_free_ char *controllers = NULL, *path = NULL;
2065
2066 /* In the unified hierarchy we can read the supported and accessible controllers from
2067 * the top-level cgroup attribute */
2068
2069 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2070 if (r < 0)
2071 return r;
2072
2073 r = read_one_line_file(path, &controllers);
2074 if (r < 0)
2075 return r;
2076
2077 r = cg_mask_from_string(controllers, &mask);
2078 if (r < 0)
2079 return r;
2080
2081 /* Mask controllers that are not supported in unified hierarchy. */
2082 mask &= CGROUP_MASK_V2;
2083
2084 } else {
2085 CGroupController c;
2086
2087 /* In the legacy hierarchy, we check which hierarchies are accessible. */
2088
2089 mask = 0;
2090 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2091 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2092 const char *n;
2093
2094 if (!FLAGS_SET(CGROUP_MASK_V1, bit))
2095 continue;
2096
2097 n = cgroup_controller_to_string(c);
2098 if (controller_is_v1_accessible(root, n) >= 0)
2099 mask |= bit;
2100 }
2101 }
2102
2103 *ret = mask;
2104 return 0;
2105 }
2106
2107 int cg_mask_supported(CGroupMask *ret) {
2108 _cleanup_free_ char *root = NULL;
2109 int r;
2110
2111 r = cg_get_root_path(&root);
2112 if (r < 0)
2113 return r;
2114
2115 return cg_mask_supported_subtree(root, ret);
2116 }
2117
2118 int cg_kernel_controllers(Set **ret) {
2119 _cleanup_set_free_ Set *controllers = NULL;
2120 _cleanup_fclose_ FILE *f = NULL;
2121 int r;
2122
2123 assert(ret);
2124
2125 /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2126 * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2127 * pseudo-controllers. */
2128
2129 r = fopen_unlocked("/proc/cgroups", "re", &f);
2130 if (r == -ENOENT) {
2131 *ret = NULL;
2132 return 0;
2133 }
2134 if (r < 0)
2135 return r;
2136
2137 /* Ignore the header line */
2138 (void) read_line(f, SIZE_MAX, NULL);
2139
2140 for (;;) {
2141 _cleanup_free_ char *controller = NULL;
2142 int enabled = 0;
2143
2144 errno = 0;
2145 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2146
2147 if (feof(f))
2148 break;
2149
2150 if (ferror(f))
2151 return errno_or_else(EIO);
2152
2153 return -EBADMSG;
2154 }
2155
2156 if (!enabled)
2157 continue;
2158
2159 if (!cg_controller_is_valid(controller))
2160 return -EBADMSG;
2161
2162 r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
2163 if (r < 0)
2164 return r;
2165 }
2166
2167 *ret = TAKE_PTR(controllers);
2168
2169 return 0;
2170 }
2171
2172 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2173 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2174 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2175 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2176 * with other tools.
2177 *
2178 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2179 * cgroup v2 process management but disable the compat dual layout, we return true on
2180 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
2181 */
2182 static thread_local bool unified_systemd_v232;
2183
2184 int cg_unified_cached(bool flush) {
2185 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2186
2187 struct statfs fs;
2188
2189 /* Checks if we support the unified hierarchy. Returns an
2190 * error when the cgroup hierarchies aren't mounted yet or we
2191 * have any other trouble determining if the unified hierarchy
2192 * is supported. */
2193
2194 if (flush)
2195 unified_cache = CGROUP_UNIFIED_UNKNOWN;
2196 else if (unified_cache >= CGROUP_UNIFIED_NONE)
2197 return unified_cache;
2198
2199 if (statfs("/sys/fs/cgroup/", &fs) < 0)
2200 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
2201
2202 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2203 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2204 unified_cache = CGROUP_UNIFIED_ALL;
2205 } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2206 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2207 F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2208 log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2209 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2210 unified_systemd_v232 = false;
2211 } else {
2212 if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
2213 if (errno == ENOENT) {
2214 /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2215 log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
2216 return -ENOMEDIUM;
2217 }
2218 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2219 }
2220
2221 if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2222 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2223 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2224 unified_systemd_v232 = true;
2225 } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2226 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2227 unified_cache = CGROUP_UNIFIED_NONE;
2228 } else {
2229 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2230 (unsigned long long) fs.f_type);
2231 unified_cache = CGROUP_UNIFIED_NONE;
2232 }
2233 }
2234 } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
2235 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2236 "No filesystem is currently mounted on /sys/fs/cgroup.");
2237 } else
2238 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
2239 "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2240 (unsigned long long)fs.f_type);
2241
2242 return unified_cache;
2243 }
2244
2245 int cg_unified_controller(const char *controller) {
2246 int r;
2247
2248 r = cg_unified_cached(false);
2249 if (r < 0)
2250 return r;
2251
2252 if (r == CGROUP_UNIFIED_NONE)
2253 return false;
2254
2255 if (r >= CGROUP_UNIFIED_ALL)
2256 return true;
2257
2258 return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2259 }
2260
2261 int cg_all_unified(void) {
2262 int r;
2263
2264 r = cg_unified_cached(false);
2265 if (r < 0)
2266 return r;
2267
2268 return r >= CGROUP_UNIFIED_ALL;
2269 }
2270
2271 int cg_hybrid_unified(void) {
2272 int r;
2273
2274 r = cg_unified_cached(false);
2275 if (r < 0)
2276 return r;
2277
2278 return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2279 }
2280
2281 int cg_is_delegated(const char *path) {
2282 int r;
2283
2284 assert(path);
2285
2286 r = cg_get_xattr_bool(path, "trusted.delegate");
2287 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2288 return r;
2289
2290 /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2291 * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2292 * delegated or not this should be safe. */
2293 r = cg_get_xattr_bool(path, "user.delegate");
2294 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
2295 }
2296
2297 int cg_is_delegated_fd(int fd) {
2298 int r;
2299
2300 assert(fd >= 0);
2301
2302 r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
2303 if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
2304 return r;
2305
2306 r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
2307 return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
2308 }
2309
2310 int cg_has_coredump_receive(const char *path) {
2311 int r;
2312
2313 assert(path);
2314
2315 r = cg_get_xattr_bool(path, "user.coredump_receive");
2316 if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2317 return false;
2318
2319 return r;
2320 }
2321
2322 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2323 [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
2324 [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
2325 [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2326 [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2327 };
2328
2329 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2330 [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
2331 [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
2332 [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2333 [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2334 };
2335
2336 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2337
2338 bool is_cgroup_fs(const struct statfs *s) {
2339 return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2340 is_fs_type(s, CGROUP2_SUPER_MAGIC);
2341 }
2342
2343 bool fd_is_cgroup_fs(int fd) {
2344 struct statfs s;
2345
2346 if (fstatfs(fd, &s) < 0)
2347 return -errno;
2348
2349 return is_cgroup_fs(&s);
2350 }
2351
2352 static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2353 [CGROUP_CONTROLLER_CPU] = "cpu",
2354 [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2355 [CGROUP_CONTROLLER_CPUSET] = "cpuset",
2356 [CGROUP_CONTROLLER_IO] = "io",
2357 [CGROUP_CONTROLLER_BLKIO] = "blkio",
2358 [CGROUP_CONTROLLER_MEMORY] = "memory",
2359 [CGROUP_CONTROLLER_DEVICES] = "devices",
2360 [CGROUP_CONTROLLER_PIDS] = "pids",
2361 [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2362 [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2363 [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
2364 [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
2365 [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2366 };
2367
2368 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
2369
2370 CGroupMask get_cpu_accounting_mask(void) {
2371 static CGroupMask needed_mask = (CGroupMask) -1;
2372
2373 /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2374 * provided externally from the CPU controller, which means we don't
2375 * need to enable the CPU controller just to get metrics. This is good,
2376 * because enabling the CPU controller comes at a minor performance
2377 * hit, especially when it's propagated deep into large hierarchies.
2378 * There's also no separate CPU accounting controller available within
2379 * a unified hierarchy.
2380 *
2381 * This combination of factors results in the desired cgroup mask to
2382 * enable for CPU accounting varying as follows:
2383 *
2384 * ╔═════════════════════╤═════════════════════╗
2385 * ║ Linux ≥4.15 │ Linux <4.15 ║
2386 * ╔═══════════════╬═════════════════════╪═════════════════════╣
2387 * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
2388 * ╟───────────────╫─────────────────────┼─────────────────────╢
2389 * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2390 * ╚═══════════════╩═════════════════════╧═════════════════════╝
2391 *
2392 * We check kernel version here instead of manually checking whether
2393 * cpu.stat is present for every cgroup, as that check in itself would
2394 * already be fairly expensive.
2395 *
2396 * Kernels where this patch has been backported will therefore have the
2397 * CPU controller enabled unnecessarily. This is more expensive than
2398 * necessary, but harmless. ☺️
2399 */
2400
2401 if (needed_mask == (CGroupMask) -1) {
2402 if (cg_all_unified()) {
2403 struct utsname u;
2404 assert_se(uname(&u) >= 0);
2405
2406 if (strverscmp_improved(u.release, "4.15") < 0)
2407 needed_mask = CGROUP_MASK_CPU;
2408 else
2409 needed_mask = 0;
2410 } else
2411 needed_mask = CGROUP_MASK_CPUACCT;
2412 }
2413
2414 return needed_mask;
2415 }
2416
2417 bool cpu_accounting_is_cheap(void) {
2418 return get_cpu_accounting_mask() == 0;
2419 }
2420
2421 static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2422 [MANAGED_OOM_AUTO] = "auto",
2423 [MANAGED_OOM_KILL] = "kill",
2424 };
2425
2426 DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
2427
2428 static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2429 [MANAGED_OOM_PREFERENCE_NONE] = "none",
2430 [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2431 [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2432 };
2433
2434 DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);