]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/core/cgroup.c
core: unified cgroup hierarchy support
[thirdparty/systemd.git] / src / core / cgroup.c
... / ...
CommitLineData
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <fcntl.h>
23#include <fnmatch.h>
24
25#include "process-util.h"
26#include "path-util.h"
27#include "special.h"
28#include "cgroup-util.h"
29#include "cgroup.h"
30
31#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
32
33void cgroup_context_init(CGroupContext *c) {
34 assert(c);
35
36 /* Initialize everything to the kernel defaults, assuming the
37 * structure is preinitialized to 0 */
38
39 c->cpu_shares = (unsigned long) -1;
40 c->startup_cpu_shares = (unsigned long) -1;
41 c->memory_limit = (uint64_t) -1;
42 c->blockio_weight = (unsigned long) -1;
43 c->startup_blockio_weight = (unsigned long) -1;
44
45 c->cpu_quota_per_sec_usec = USEC_INFINITY;
46}
47
48void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
49 assert(c);
50 assert(a);
51
52 LIST_REMOVE(device_allow, c->device_allow, a);
53 free(a->path);
54 free(a);
55}
56
57void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
58 assert(c);
59 assert(w);
60
61 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
62 free(w->path);
63 free(w);
64}
65
66void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
67 assert(c);
68 assert(b);
69
70 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
71 free(b->path);
72 free(b);
73}
74
75void cgroup_context_done(CGroupContext *c) {
76 assert(c);
77
78 while (c->blockio_device_weights)
79 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
80
81 while (c->blockio_device_bandwidths)
82 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
83
84 while (c->device_allow)
85 cgroup_context_free_device_allow(c, c->device_allow);
86}
87
88void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
89 CGroupBlockIODeviceBandwidth *b;
90 CGroupBlockIODeviceWeight *w;
91 CGroupDeviceAllow *a;
92 char u[FORMAT_TIMESPAN_MAX];
93
94 assert(c);
95 assert(f);
96
97 prefix = strempty(prefix);
98
99 fprintf(f,
100 "%sCPUAccounting=%s\n"
101 "%sBlockIOAccounting=%s\n"
102 "%sMemoryAccounting=%s\n"
103 "%sCPUShares=%lu\n"
104 "%sStartupCPUShares=%lu\n"
105 "%sCPUQuotaPerSecSec=%s\n"
106 "%sBlockIOWeight=%lu\n"
107 "%sStartupBlockIOWeight=%lu\n"
108 "%sMemoryLimit=%" PRIu64 "\n"
109 "%sDevicePolicy=%s\n"
110 "%sDelegate=%s\n",
111 prefix, yes_no(c->cpu_accounting),
112 prefix, yes_no(c->blockio_accounting),
113 prefix, yes_no(c->memory_accounting),
114 prefix, c->cpu_shares,
115 prefix, c->startup_cpu_shares,
116 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
117 prefix, c->blockio_weight,
118 prefix, c->startup_blockio_weight,
119 prefix, c->memory_limit,
120 prefix, cgroup_device_policy_to_string(c->device_policy),
121 prefix, yes_no(c->delegate));
122
123 LIST_FOREACH(device_allow, a, c->device_allow)
124 fprintf(f,
125 "%sDeviceAllow=%s %s%s%s\n",
126 prefix,
127 a->path,
128 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
129
130 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
131 fprintf(f,
132 "%sBlockIODeviceWeight=%s %lu",
133 prefix,
134 w->path,
135 w->weight);
136
137 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
138 char buf[FORMAT_BYTES_MAX];
139
140 fprintf(f,
141 "%s%s=%s %s\n",
142 prefix,
143 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
144 b->path,
145 format_bytes(buf, sizeof(buf), b->bandwidth));
146 }
147}
148
149static int lookup_blkio_device(const char *p, dev_t *dev) {
150 struct stat st;
151 int r;
152
153 assert(p);
154 assert(dev);
155
156 r = stat(p, &st);
157 if (r < 0)
158 return log_warning_errno(errno, "Couldn't stat device %s: %m", p);
159
160 if (S_ISBLK(st.st_mode))
161 *dev = st.st_rdev;
162 else if (major(st.st_dev) != 0) {
163 /* If this is not a device node then find the block
164 * device this file is stored on */
165 *dev = st.st_dev;
166
167 /* If this is a partition, try to get the originating
168 * block device */
169 block_get_whole_disk(*dev, dev);
170 } else {
171 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
172 return -ENODEV;
173 }
174
175 return 0;
176}
177
178static int whitelist_device(const char *path, const char *node, const char *acc) {
179 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
180 struct stat st;
181 int r;
182
183 assert(path);
184 assert(acc);
185
186 if (stat(node, &st) < 0) {
187 log_warning("Couldn't stat device %s", node);
188 return -errno;
189 }
190
191 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
192 log_warning("%s is not a device.", node);
193 return -ENODEV;
194 }
195
196 sprintf(buf,
197 "%c %u:%u %s",
198 S_ISCHR(st.st_mode) ? 'c' : 'b',
199 major(st.st_rdev), minor(st.st_rdev),
200 acc);
201
202 r = cg_set_attribute("devices", path, "devices.allow", buf);
203 if (r < 0)
204 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
205 "Failed to set devices.allow on %s: %m", path);
206
207 return r;
208}
209
210static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
211 _cleanup_fclose_ FILE *f = NULL;
212 char line[LINE_MAX];
213 bool good = false;
214 int r;
215
216 assert(path);
217 assert(acc);
218 assert(type == 'b' || type == 'c');
219
220 f = fopen("/proc/devices", "re");
221 if (!f)
222 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
223
224 FOREACH_LINE(line, f, goto fail) {
225 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
226 unsigned maj;
227
228 truncate_nl(line);
229
230 if (type == 'c' && streq(line, "Character devices:")) {
231 good = true;
232 continue;
233 }
234
235 if (type == 'b' && streq(line, "Block devices:")) {
236 good = true;
237 continue;
238 }
239
240 if (isempty(line)) {
241 good = false;
242 continue;
243 }
244
245 if (!good)
246 continue;
247
248 p = strstrip(line);
249
250 w = strpbrk(p, WHITESPACE);
251 if (!w)
252 continue;
253 *w = 0;
254
255 r = safe_atou(p, &maj);
256 if (r < 0)
257 continue;
258 if (maj <= 0)
259 continue;
260
261 w++;
262 w += strspn(w, WHITESPACE);
263
264 if (fnmatch(name, w, 0) != 0)
265 continue;
266
267 sprintf(buf,
268 "%c %u:* %s",
269 type,
270 maj,
271 acc);
272
273 r = cg_set_attribute("devices", path, "devices.allow", buf);
274 if (r < 0)
275 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
276 "Failed to set devices.allow on %s: %m", path);
277 }
278
279 return 0;
280
281fail:
282 log_warning_errno(errno, "Failed to read /proc/devices: %m");
283 return -errno;
284}
285
286void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {
287 bool is_root;
288 int r;
289
290 assert(c);
291 assert(path);
292
293 if (mask == 0)
294 return;
295
296 /* Some cgroup attributes are not supported on the root cgroup,
297 * hence silently ignore */
298 is_root = isempty(path) || path_equal(path, "/");
299 if (is_root)
300 /* Make sure we don't try to display messages with an empty path. */
301 path = "/";
302
303 /* We generally ignore errors caused by read-only mounted
304 * cgroup trees (assuming we are running in a container then),
305 * and missing cgroups, i.e. EROFS and ENOENT. */
306
307 if ((mask & CGROUP_MASK_CPU) && !is_root) {
308 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
309
310 sprintf(buf, "%lu\n",
311 IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
312 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
313 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
314 if (r < 0)
315 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
316 "Failed to set cpu.shares on %s: %m", path);
317
318 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
319 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
320 if (r < 0)
321 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
322 "Failed to set cpu.cfs_period_us on %s: %m", path);
323
324 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
325 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
326 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
327 } else
328 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
329 if (r < 0)
330 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
331 "Failed to set cpu.cfs_quota_us on %s: %m", path);
332 }
333
334 if (mask & CGROUP_MASK_BLKIO) {
335 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
336 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
337 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
338 CGroupBlockIODeviceWeight *w;
339 CGroupBlockIODeviceBandwidth *b;
340
341 if (!is_root) {
342 sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
343 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
344 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
345 if (r < 0)
346 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
347 "Failed to set blkio.weight on %s: %m", path);
348
349 /* FIXME: no way to reset this list */
350 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
351 dev_t dev;
352
353 r = lookup_blkio_device(w->path, &dev);
354 if (r < 0)
355 continue;
356
357 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
358 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
359 if (r < 0)
360 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
361 "Failed to set blkio.weight_device on %s: %m", path);
362 }
363 }
364
365 /* FIXME: no way to reset this list */
366 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
367 const char *a;
368 dev_t dev;
369
370 r = lookup_blkio_device(b->path, &dev);
371 if (r < 0)
372 continue;
373
374 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
375
376 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
377 r = cg_set_attribute("blkio", path, a, buf);
378 if (r < 0)
379 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
380 "Failed to set %s on %s: %m", a, path);
381 }
382 }
383
384 if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
385 if (c->memory_limit != (uint64_t) -1) {
386 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
387
388 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
389
390 if (cg_unified() <= 0)
391 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
392 else
393 r = cg_set_attribute("memory", path, "memory.max", buf);
394
395 } else {
396 if (cg_unified() <= 0)
397 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
398 else
399 r = cg_set_attribute("memory", path, "memory.max", "max");
400 }
401
402 if (r < 0)
403 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
404 "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
405 }
406
407 if ((mask & CGROUP_MASK_DEVICE) && !is_root) {
408 CGroupDeviceAllow *a;
409
410 /* Changing the devices list of a populated cgroup
411 * might result in EINVAL, hence ignore EINVAL
412 * here. */
413
414 if (c->device_allow || c->device_policy != CGROUP_AUTO)
415 r = cg_set_attribute("devices", path, "devices.deny", "a");
416 else
417 r = cg_set_attribute("devices", path, "devices.allow", "a");
418 if (r < 0)
419 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
420 "Failed to reset devices.list on %s: %m", path);
421
422 if (c->device_policy == CGROUP_CLOSED ||
423 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
424 static const char auto_devices[] =
425 "/dev/null\0" "rwm\0"
426 "/dev/zero\0" "rwm\0"
427 "/dev/full\0" "rwm\0"
428 "/dev/random\0" "rwm\0"
429 "/dev/urandom\0" "rwm\0"
430 "/dev/tty\0" "rwm\0"
431 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
432
433 const char *x, *y;
434
435 NULSTR_FOREACH_PAIR(x, y, auto_devices)
436 whitelist_device(path, x, y);
437
438 whitelist_major(path, "pts", 'c', "rw");
439 whitelist_major(path, "kdbus", 'c', "rw");
440 whitelist_major(path, "kdbus/*", 'c', "rw");
441 }
442
443 LIST_FOREACH(device_allow, a, c->device_allow) {
444 char acc[4];
445 unsigned k = 0;
446
447 if (a->r)
448 acc[k++] = 'r';
449 if (a->w)
450 acc[k++] = 'w';
451 if (a->m)
452 acc[k++] = 'm';
453
454 if (k == 0)
455 continue;
456
457 acc[k++] = 0;
458
459 if (startswith(a->path, "/dev/"))
460 whitelist_device(path, a->path, acc);
461 else if (startswith(a->path, "block-"))
462 whitelist_major(path, a->path + 6, 'b', acc);
463 else if (startswith(a->path, "char-"))
464 whitelist_major(path, a->path + 5, 'c', acc);
465 else
466 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
467 }
468 }
469}
470
471CGroupMask cgroup_context_get_mask(CGroupContext *c) {
472 CGroupMask mask = 0;
473
474 /* Figure out which controllers we need */
475
476 if (c->cpu_accounting ||
477 c->cpu_shares != (unsigned long) -1 ||
478 c->startup_cpu_shares != (unsigned long) -1 ||
479 c->cpu_quota_per_sec_usec != USEC_INFINITY)
480 mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
481
482 if (c->blockio_accounting ||
483 c->blockio_weight != (unsigned long) -1 ||
484 c->startup_blockio_weight != (unsigned long) -1 ||
485 c->blockio_device_weights ||
486 c->blockio_device_bandwidths)
487 mask |= CGROUP_MASK_BLKIO;
488
489 if (c->memory_accounting ||
490 c->memory_limit != (uint64_t) -1)
491 mask |= CGROUP_MASK_MEMORY;
492
493 if (c->device_allow ||
494 c->device_policy != CGROUP_AUTO)
495 mask |= CGROUP_MASK_DEVICE;
496
497 return mask;
498}
499
500CGroupMask unit_get_own_mask(Unit *u) {
501 CGroupContext *c;
502
503 /* Returns the mask of controllers the unit needs for itself */
504
505 c = unit_get_cgroup_context(u);
506 if (!c)
507 return 0;
508
509 /* If delegation is turned on, then turn on all cgroups,
510 * unless the process we fork into it is known to drop
511 * privileges anyway, and shouldn't get access to the
512 * controllers anyway. */
513
514 if (c->delegate) {
515 ExecContext *e;
516
517 e = unit_get_exec_context(u);
518 if (!e || exec_context_maintains_privileges(e))
519 return _CGROUP_MASK_ALL;
520 }
521
522 return cgroup_context_get_mask(c);
523}
524
525CGroupMask unit_get_members_mask(Unit *u) {
526 assert(u);
527
528 /* Returns the mask of controllers all of the unit's children
529 * require, merged */
530
531 if (u->cgroup_members_mask_valid)
532 return u->cgroup_members_mask;
533
534 u->cgroup_members_mask = 0;
535
536 if (u->type == UNIT_SLICE) {
537 Unit *member;
538 Iterator i;
539
540 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
541
542 if (member == u)
543 continue;
544
545 if (UNIT_DEREF(member->slice) != u)
546 continue;
547
548 u->cgroup_members_mask |=
549 unit_get_own_mask(member) |
550 unit_get_members_mask(member);
551 }
552 }
553
554 u->cgroup_members_mask_valid = true;
555 return u->cgroup_members_mask;
556}
557
558CGroupMask unit_get_siblings_mask(Unit *u) {
559 assert(u);
560
561 /* Returns the mask of controllers all of the unit's siblings
562 * require, i.e. the members mask of the unit's parent slice
563 * if there is one. */
564
565 if (UNIT_ISSET(u->slice))
566 return unit_get_members_mask(UNIT_DEREF(u->slice));
567
568 return unit_get_own_mask(u) | unit_get_members_mask(u);
569}
570
571CGroupMask unit_get_subtree_mask(Unit *u) {
572
573 /* Returns the mask of this subtree, meaning of the group
574 * itself and its children. */
575
576 return unit_get_own_mask(u) | unit_get_members_mask(u);
577}
578
579CGroupMask unit_get_target_mask(Unit *u) {
580 CGroupMask mask;
581
582 /* This returns the cgroup mask of all controllers to enable
583 * for a specific cgroup, i.e. everything it needs itself,
584 * plus all that its children need, plus all that its siblings
585 * need. This is primarily useful on the legacy cgroup
586 * hierarchy, where we need to duplicate each cgroup in each
587 * hierarchy that shall be enabled for it. */
588
589 mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
590 mask &= u->manager->cgroup_supported;
591
592 return mask;
593}
594
595CGroupMask unit_get_enable_mask(Unit *u) {
596 CGroupMask mask;
597
598 /* This returns the cgroup mask of all controllers to enable
599 * for the children of a specific cgroup. This is primarily
600 * useful for the unified cgroup hierarchy, where each cgroup
601 * controls which controllers are enabled for its children. */
602
603 mask = unit_get_members_mask(u);
604 mask &= u->manager->cgroup_supported;
605
606 return mask;
607}
608
609/* Recurse from a unit up through its containing slices, propagating
610 * mask bits upward. A unit is also member of itself. */
611void unit_update_cgroup_members_masks(Unit *u) {
612 CGroupMask m;
613 bool more;
614
615 assert(u);
616
617 /* Calculate subtree mask */
618 m = unit_get_subtree_mask(u);
619
620 /* See if anything changed from the previous invocation. If
621 * not, we're done. */
622 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
623 return;
624
625 more =
626 u->cgroup_subtree_mask_valid &&
627 ((m & ~u->cgroup_subtree_mask) != 0) &&
628 ((~m & u->cgroup_subtree_mask) == 0);
629
630 u->cgroup_subtree_mask = m;
631 u->cgroup_subtree_mask_valid = true;
632
633 if (UNIT_ISSET(u->slice)) {
634 Unit *s = UNIT_DEREF(u->slice);
635
636 if (more)
637 /* There's more set now than before. We
638 * propagate the new mask to the parent's mask
639 * (not caring if it actually was valid or
640 * not). */
641
642 s->cgroup_members_mask |= m;
643
644 else
645 /* There's less set now than before (or we
646 * don't know), we need to recalculate
647 * everything, so let's invalidate the
648 * parent's members mask */
649
650 s->cgroup_members_mask_valid = false;
651
652 /* And now make sure that this change also hits our
653 * grandparents */
654 unit_update_cgroup_members_masks(s);
655 }
656}
657
658static const char *migrate_callback(CGroupMask mask, void *userdata) {
659 Unit *u = userdata;
660
661 assert(mask != 0);
662 assert(u);
663
664 while (u) {
665 if (u->cgroup_path &&
666 u->cgroup_realized &&
667 (u->cgroup_realized_mask & mask) == mask)
668 return u->cgroup_path;
669
670 u = UNIT_DEREF(u->slice);
671 }
672
673 return NULL;
674}
675
676char *unit_default_cgroup_path(Unit *u) {
677 _cleanup_free_ char *escaped = NULL, *slice = NULL;
678 int r;
679
680 assert(u);
681
682 if (unit_has_name(u, SPECIAL_ROOT_SLICE))
683 return strdup(u->manager->cgroup_root);
684
685 if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) {
686 r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice);
687 if (r < 0)
688 return NULL;
689 }
690
691 escaped = cg_escape(u->id);
692 if (!escaped)
693 return NULL;
694
695 if (slice)
696 return strjoin(u->manager->cgroup_root, "/", slice, "/", escaped, NULL);
697 else
698 return strjoin(u->manager->cgroup_root, "/", escaped, NULL);
699}
700
701int unit_set_cgroup_path(Unit *u, const char *path) {
702 _cleanup_free_ char *p = NULL;
703 int r;
704
705 assert(u);
706
707 if (path) {
708 p = strdup(path);
709 if (!p)
710 return -ENOMEM;
711 } else
712 p = NULL;
713
714 if (streq_ptr(u->cgroup_path, p))
715 return 0;
716
717 if (p) {
718 r = hashmap_put(u->manager->cgroup_unit, p, u);
719 if (r < 0)
720 return r;
721 }
722
723 unit_release_cgroup(u);
724
725 u->cgroup_path = p;
726 p = NULL;
727
728 return 1;
729}
730
731int unit_watch_cgroup(Unit *u) {
732 _cleanup_free_ char *populated = NULL;
733 int r;
734
735 assert(u);
736
737 if (!u->cgroup_path)
738 return 0;
739
740 if (u->cgroup_inotify_wd >= 0)
741 return 0;
742
743 /* Only applies to the unified hierarchy */
744 r = cg_unified();
745 if (r < 0)
746 return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m");
747 if (r == 0)
748 return 0;
749
750 /* Don't watch the root slice, it's pointless. */
751 if (unit_has_name(u, SPECIAL_ROOT_SLICE))
752 return 0;
753
754 r = hashmap_ensure_allocated(&u->manager->cgroup_inotify_wd_unit, &trivial_hash_ops);
755 if (r < 0)
756 return log_oom();
757
758 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.populated", &populated);
759 if (r < 0)
760 return log_oom();
761
762 u->cgroup_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, populated, IN_MODIFY);
763 if (u->cgroup_inotify_wd < 0) {
764
765 if (errno == ENOENT) /* If the directory is already
766 * gone we don't need to track
767 * it, so this is not an error */
768 return 0;
769
770 return log_unit_error_errno(u, errno, "Failed to add inotify watch descriptor for control group %s: %m", u->cgroup_path);
771 }
772
773 r = hashmap_put(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd), u);
774 if (r < 0)
775 return log_unit_error_errno(u, r, "Failed to add inotify watch descriptor to hash map: %m");
776
777 return 0;
778}
779
780static int unit_create_cgroup(
781 Unit *u,
782 CGroupMask target_mask,
783 CGroupMask enable_mask) {
784
785 CGroupContext *c;
786 int r;
787
788 assert(u);
789
790 c = unit_get_cgroup_context(u);
791 if (!c)
792 return 0;
793
794 if (!u->cgroup_path) {
795 _cleanup_free_ char *path = NULL;
796
797 path = unit_default_cgroup_path(u);
798 if (!path)
799 return log_oom();
800
801 r = unit_set_cgroup_path(u, path);
802 if (r == -EEXIST)
803 return log_unit_error_errno(u, r, "Control group %s exists already.", path);
804 if (r < 0)
805 return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
806 }
807
808 /* First, create our own group */
809 r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
810 if (r < 0)
811 return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
812
813 /* Start watching it */
814 (void) unit_watch_cgroup(u);
815
816 /* Enable all controllers we need */
817 r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path);
818 if (r < 0)
819 log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
820
821 /* Keep track that this is now realized */
822 u->cgroup_realized = true;
823 u->cgroup_realized_mask = target_mask;
824
825 if (u->type != UNIT_SLICE && !c->delegate) {
826
827 /* Then, possibly move things over, but not if
828 * subgroups may contain processes, which is the case
829 * for slice and delegation units. */
830 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
831 if (r < 0)
832 log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
833 }
834
835 return 0;
836}
837
838int unit_attach_pids_to_cgroup(Unit *u) {
839 int r;
840 assert(u);
841
842 r = unit_realize_cgroup(u);
843 if (r < 0)
844 return r;
845
846 r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
847 if (r < 0)
848 return r;
849
850 return 0;
851}
852
853static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask) {
854 assert(u);
855
856 return u->cgroup_realized && u->cgroup_realized_mask == target_mask;
857}
858
859/* Check if necessary controllers and attributes for a unit are in place.
860 *
861 * If so, do nothing.
862 * If not, create paths, move processes over, and set attributes.
863 *
864 * Returns 0 on success and < 0 on failure. */
865static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
866 CGroupMask target_mask, enable_mask;
867 int r;
868
869 assert(u);
870
871 if (u->in_cgroup_queue) {
872 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
873 u->in_cgroup_queue = false;
874 }
875
876 target_mask = unit_get_target_mask(u);
877 if (unit_has_mask_realized(u, target_mask))
878 return 0;
879
880 /* First, realize parents */
881 if (UNIT_ISSET(u->slice)) {
882 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
883 if (r < 0)
884 return r;
885 }
886
887 /* And then do the real work */
888 enable_mask = unit_get_enable_mask(u);
889 r = unit_create_cgroup(u, target_mask, enable_mask);
890 if (r < 0)
891 return r;
892
893 /* Finally, apply the necessary attributes. */
894 cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state);
895
896 return 0;
897}
898
899static void unit_add_to_cgroup_queue(Unit *u) {
900
901 if (u->in_cgroup_queue)
902 return;
903
904 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
905 u->in_cgroup_queue = true;
906}
907
908unsigned manager_dispatch_cgroup_queue(Manager *m) {
909 ManagerState state;
910 unsigned n = 0;
911 Unit *i;
912 int r;
913
914 state = manager_state(m);
915
916 while ((i = m->cgroup_queue)) {
917 assert(i->in_cgroup_queue);
918
919 r = unit_realize_cgroup_now(i, state);
920 if (r < 0)
921 log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
922
923 n++;
924 }
925
926 return n;
927}
928
929static void unit_queue_siblings(Unit *u) {
930 Unit *slice;
931
932 /* This adds the siblings of the specified unit and the
933 * siblings of all parent units to the cgroup queue. (But
934 * neither the specified unit itself nor the parents.) */
935
936 while ((slice = UNIT_DEREF(u->slice))) {
937 Iterator i;
938 Unit *m;
939
940 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
941 if (m == u)
942 continue;
943
944 /* Skip units that have a dependency on the slice
945 * but aren't actually in it. */
946 if (UNIT_DEREF(m->slice) != slice)
947 continue;
948
949 /* No point in doing cgroup application for units
950 * without active processes. */
951 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
952 continue;
953
954 /* If the unit doesn't need any new controllers
955 * and has current ones realized, it doesn't need
956 * any changes. */
957 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
958 continue;
959
960 unit_add_to_cgroup_queue(m);
961 }
962
963 u = slice;
964 }
965}
966
967int unit_realize_cgroup(Unit *u) {
968 assert(u);
969
970 if (!UNIT_HAS_CGROUP_CONTEXT(u))
971 return 0;
972
973 /* So, here's the deal: when realizing the cgroups for this
974 * unit, we need to first create all parents, but there's more
975 * actually: for the weight-based controllers we also need to
976 * make sure that all our siblings (i.e. units that are in the
977 * same slice as we are) have cgroups, too. Otherwise, things
978 * would become very uneven as each of their processes would
979 * get as much resources as all our group together. This call
980 * will synchronously create the parent cgroups, but will
981 * defer work on the siblings to the next event loop
982 * iteration. */
983
984 /* Add all sibling slices to the cgroup queue. */
985 unit_queue_siblings(u);
986
987 /* And realize this one now (and apply the values) */
988 return unit_realize_cgroup_now(u, manager_state(u->manager));
989}
990
991void unit_release_cgroup(Unit *u) {
992 assert(u);
993
994 /* Forgets all cgroup details for this cgroup */
995
996 if (u->cgroup_path) {
997 (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
998 u->cgroup_path = mfree(u->cgroup_path);
999 }
1000
1001 if (u->cgroup_inotify_wd >= 0) {
1002 if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0)
1003 log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring", u->cgroup_inotify_wd, u->id);
1004
1005 (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd));
1006 u->cgroup_inotify_wd = -1;
1007 }
1008}
1009
1010void unit_prune_cgroup(Unit *u) {
1011 int r;
1012 bool is_root_slice;
1013
1014 assert(u);
1015
1016 /* Removes the cgroup, if empty and possible, and stops watching it. */
1017
1018 if (!u->cgroup_path)
1019 return;
1020
1021 is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
1022
1023 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
1024 if (r < 0) {
1025 log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
1026 return;
1027 }
1028
1029 if (is_root_slice)
1030 return;
1031
1032 unit_release_cgroup(u);
1033
1034 u->cgroup_realized = false;
1035 u->cgroup_realized_mask = 0;
1036}
1037
1038int unit_search_main_pid(Unit *u, pid_t *ret) {
1039 _cleanup_fclose_ FILE *f = NULL;
1040 pid_t pid = 0, npid, mypid;
1041 int r;
1042
1043 assert(u);
1044 assert(ret);
1045
1046 if (!u->cgroup_path)
1047 return -ENXIO;
1048
1049 r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
1050 if (r < 0)
1051 return r;
1052
1053 mypid = getpid();
1054 while (cg_read_pid(f, &npid) > 0) {
1055 pid_t ppid;
1056
1057 if (npid == pid)
1058 continue;
1059
1060 /* Ignore processes that aren't our kids */
1061 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
1062 continue;
1063
1064 if (pid != 0)
1065 /* Dang, there's more than one daemonized PID
1066 in this group, so we don't know what process
1067 is the main process. */
1068
1069 return -ENODATA;
1070
1071 pid = npid;
1072 }
1073
1074 *ret = pid;
1075 return 0;
1076}
1077
1078static int unit_watch_pids_in_path(Unit *u, const char *path) {
1079 _cleanup_closedir_ DIR *d = NULL;
1080 _cleanup_fclose_ FILE *f = NULL;
1081 int ret = 0, r;
1082
1083 assert(u);
1084 assert(path);
1085
1086 r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
1087 if (r < 0)
1088 ret = r;
1089 else {
1090 pid_t pid;
1091
1092 while ((r = cg_read_pid(f, &pid)) > 0) {
1093 r = unit_watch_pid(u, pid);
1094 if (r < 0 && ret >= 0)
1095 ret = r;
1096 }
1097
1098 if (r < 0 && ret >= 0)
1099 ret = r;
1100 }
1101
1102 r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
1103 if (r < 0) {
1104 if (ret >= 0)
1105 ret = r;
1106 } else {
1107 char *fn;
1108
1109 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1110 _cleanup_free_ char *p = NULL;
1111
1112 p = strjoin(path, "/", fn, NULL);
1113 free(fn);
1114
1115 if (!p)
1116 return -ENOMEM;
1117
1118 r = unit_watch_pids_in_path(u, p);
1119 if (r < 0 && ret >= 0)
1120 ret = r;
1121 }
1122
1123 if (r < 0 && ret >= 0)
1124 ret = r;
1125 }
1126
1127 return ret;
1128}
1129
1130int unit_watch_all_pids(Unit *u) {
1131 assert(u);
1132
1133 /* Adds all PIDs from our cgroup to the set of PIDs we
1134 * watch. This is a fallback logic for cases where we do not
1135 * get reliable cgroup empty notifications: we try to use
1136 * SIGCHLD as replacement. */
1137
1138 if (!u->cgroup_path)
1139 return -ENOENT;
1140
1141 if (cg_unified() > 0) /* On unified we can use proper notifications */
1142 return 0;
1143
1144 return unit_watch_pids_in_path(u, u->cgroup_path);
1145}
1146
1147int unit_notify_cgroup_empty(Unit *u) {
1148 int r;
1149
1150 assert(u);
1151
1152 if (!u->cgroup_path)
1153 return 0;
1154
1155 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
1156 if (r <= 0)
1157 return r;
1158
1159 unit_add_to_gc_queue(u);
1160
1161 if (UNIT_VTABLE(u)->notify_cgroup_empty)
1162 UNIT_VTABLE(u)->notify_cgroup_empty(u);
1163
1164 return 0;
1165}
1166
1167static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1168 Manager *m = userdata;
1169
1170 assert(s);
1171 assert(fd >= 0);
1172 assert(m);
1173
1174 for (;;) {
1175 union inotify_event_buffer buffer;
1176 struct inotify_event *e;
1177 ssize_t l;
1178
1179 l = read(fd, &buffer, sizeof(buffer));
1180 if (l < 0) {
1181 if (errno == EINTR || errno == EAGAIN)
1182 return 0;
1183
1184 return log_error_errno(errno, "Failed to read control group inotify events: %m");
1185 }
1186
1187 FOREACH_INOTIFY_EVENT(e, buffer, l) {
1188 Unit *u;
1189
1190 if (e->wd < 0)
1191 /* Queue overflow has no watch descriptor */
1192 continue;
1193
1194 if (e->mask & IN_IGNORED)
1195 /* The watch was just removed */
1196 continue;
1197
1198 u = hashmap_get(m->cgroup_inotify_wd_unit, INT_TO_PTR(e->wd));
1199 if (!u) /* Not that inotify might deliver
1200 * events for a watch even after it
1201 * was removed, because it was queued
1202 * before the removal. Let's ignore
1203 * this here safely. */
1204 continue;
1205
1206 (void) unit_notify_cgroup_empty(u);
1207 }
1208 }
1209}
1210
1211int manager_setup_cgroup(Manager *m) {
1212 _cleanup_free_ char *path = NULL;
1213 CGroupController c;
1214 int r, unified;
1215 char *e;
1216
1217 assert(m);
1218
1219 /* 1. Determine hierarchy */
1220 m->cgroup_root = mfree(m->cgroup_root);
1221 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
1222 if (r < 0)
1223 return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
1224
1225 /* Chop off the init scope, if we are already located in it */
1226 e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
1227
1228 /* LEGACY: Also chop off the system slice if we are in
1229 * it. This is to support live upgrades from older systemd
1230 * versions where PID 1 was moved there. Also see
1231 * cg_get_root_path(). */
1232 if (!e && m->running_as == MANAGER_SYSTEM) {
1233 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
1234 if (!e)
1235 e = endswith(m->cgroup_root, "/system"); /* even more legacy */
1236 }
1237 if (e)
1238 *e = 0;
1239
1240 /* And make sure to store away the root value without trailing
1241 * slash, even for the root dir, so that we can easily prepend
1242 * it everywhere. */
1243 while ((e = endswith(m->cgroup_root, "/")))
1244 *e = 0;
1245
1246 /* 2. Show data */
1247 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
1248 if (r < 0)
1249 return log_error_errno(r, "Cannot find cgroup mount point: %m");
1250
1251 unified = cg_unified();
1252 if (unified < 0)
1253 return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
1254 if (unified > 0)
1255 log_debug("Unified cgroup hierarchy is located at %s.", path);
1256 else
1257 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
1258
1259 if (!m->test_run) {
1260 const char *scope_path;
1261
1262 /* 3. Install agent */
1263 if (unified) {
1264
1265 /* In the unified hierarchy we can can get
1266 * cgroup empty notifications via inotify. */
1267
1268 m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
1269 safe_close(m->cgroup_inotify_fd);
1270
1271 m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1272 if (m->cgroup_inotify_fd < 0)
1273 return log_error_errno(errno, "Failed to create control group inotify object: %m");
1274
1275 r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
1276 if (r < 0)
1277 return log_error_errno(r, "Failed to watch control group inotify object: %m");
1278
1279 r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_IDLE - 5);
1280 if (r < 0)
1281 return log_error_errno(r, "Failed to set priority of inotify event source: %m");
1282
1283 (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
1284
1285 } else if (m->running_as == MANAGER_SYSTEM) {
1286
1287 /* On the legacy hierarchy we only get
1288 * notifications via cgroup agents. (Which
1289 * isn't really reliable, since it does not
1290 * generate events when control groups with
1291 * children run empty. */
1292
1293 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
1294 if (r < 0)
1295 log_warning_errno(r, "Failed to install release agent, ignoring: %m");
1296 else if (r > 0)
1297 log_debug("Installed release agent.");
1298 else if (r == 0)
1299 log_debug("Release agent already installed.");
1300 }
1301
1302 /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
1303 scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
1304 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
1305 if (r < 0)
1306 return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
1307
1308 /* also, move all other userspace processes remaining
1309 * in the root cgroup into that scope. */
1310 r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false);
1311 if (r < 0)
1312 log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
1313
1314 /* 5. And pin it, so that it cannot be unmounted */
1315 safe_close(m->pin_cgroupfs_fd);
1316 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
1317 if (m->pin_cgroupfs_fd < 0)
1318 return log_error_errno(errno, "Failed to open pin file: %m");
1319
1320 /* 6. Always enable hierarchical support if it exists... */
1321 if (!unified)
1322 (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
1323 }
1324
1325 /* 7. Figure out which controllers are supported */
1326 r = cg_mask_supported(&m->cgroup_supported);
1327 if (r < 0)
1328 return log_error_errno(r, "Failed to determine supported controllers: %m");
1329
1330 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
1331 log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c));
1332
1333 return 0;
1334}
1335
1336void manager_shutdown_cgroup(Manager *m, bool delete) {
1337 assert(m);
1338
1339 /* We can't really delete the group, since we are in it. But
1340 * let's trim it. */
1341 if (delete && m->cgroup_root)
1342 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
1343
1344 m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit);
1345
1346 m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
1347 m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);
1348
1349 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
1350
1351 m->cgroup_root = mfree(m->cgroup_root);
1352}
1353
1354Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
1355 char *p;
1356 Unit *u;
1357
1358 assert(m);
1359 assert(cgroup);
1360
1361 u = hashmap_get(m->cgroup_unit, cgroup);
1362 if (u)
1363 return u;
1364
1365 p = strdupa(cgroup);
1366 for (;;) {
1367 char *e;
1368
1369 e = strrchr(p, '/');
1370 if (!e || e == p)
1371 return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);
1372
1373 *e = 0;
1374
1375 u = hashmap_get(m->cgroup_unit, p);
1376 if (u)
1377 return u;
1378 }
1379}
1380
1381Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
1382 _cleanup_free_ char *cgroup = NULL;
1383 Unit *u;
1384 int r;
1385
1386 assert(m);
1387
1388 if (pid <= 0)
1389 return NULL;
1390
1391 if (pid == 1)
1392 return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
1393
1394 u = hashmap_get(m->watch_pids1, LONG_TO_PTR(pid));
1395 if (u)
1396 return u;
1397
1398 u = hashmap_get(m->watch_pids2, LONG_TO_PTR(pid));
1399 if (u)
1400 return u;
1401
1402 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
1403 if (r < 0)
1404 return NULL;
1405
1406 return manager_get_unit_by_cgroup(m, cgroup);
1407}
1408
1409int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
1410 Unit *u;
1411
1412 assert(m);
1413 assert(cgroup);
1414
1415 u = manager_get_unit_by_cgroup(m, cgroup);
1416 if (!u)
1417 return 0;
1418
1419 return unit_notify_cgroup_empty(u);
1420}
1421
1422int unit_get_memory_current(Unit *u, uint64_t *ret) {
1423 _cleanup_free_ char *v = NULL;
1424 int r;
1425
1426 assert(u);
1427 assert(ret);
1428
1429 if (!u->cgroup_path)
1430 return -ENODATA;
1431
1432 if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
1433 return -ENODATA;
1434
1435 if (cg_unified() <= 0)
1436 r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
1437 else
1438 r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
1439 if (r == -ENOENT)
1440 return -ENODATA;
1441 if (r < 0)
1442 return r;
1443
1444 return safe_atou64(v, ret);
1445}
1446
1447static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
1448 _cleanup_free_ char *v = NULL;
1449 uint64_t ns;
1450 int r;
1451
1452 assert(u);
1453 assert(ret);
1454
1455 if (!u->cgroup_path)
1456 return -ENODATA;
1457
1458 if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
1459 return -ENODATA;
1460
1461 r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
1462 if (r == -ENOENT)
1463 return -ENODATA;
1464 if (r < 0)
1465 return r;
1466
1467 r = safe_atou64(v, &ns);
1468 if (r < 0)
1469 return r;
1470
1471 *ret = ns;
1472 return 0;
1473}
1474
1475int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
1476 nsec_t ns;
1477 int r;
1478
1479 r = unit_get_cpu_usage_raw(u, &ns);
1480 if (r < 0)
1481 return r;
1482
1483 if (ns > u->cpuacct_usage_base)
1484 ns -= u->cpuacct_usage_base;
1485 else
1486 ns = 0;
1487
1488 *ret = ns;
1489 return 0;
1490}
1491
1492int unit_reset_cpu_usage(Unit *u) {
1493 nsec_t ns;
1494 int r;
1495
1496 assert(u);
1497
1498 r = unit_get_cpu_usage_raw(u, &ns);
1499 if (r < 0) {
1500 u->cpuacct_usage_base = 0;
1501 return r;
1502 }
1503
1504 u->cpuacct_usage_base = ns;
1505 return 0;
1506}
1507
1508bool unit_cgroup_delegate(Unit *u) {
1509 CGroupContext *c;
1510
1511 assert(u);
1512
1513 c = unit_get_cgroup_context(u);
1514 if (!c)
1515 return false;
1516
1517 return c->delegate;
1518}
1519
1520static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
1521 [CGROUP_AUTO] = "auto",
1522 [CGROUP_CLOSED] = "closed",
1523 [CGROUP_STRICT] = "strict",
1524};
1525
1526DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);