1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include "process-util.h"
26 #include "path-util.h"
28 #include "cgroup-util.h"
31 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
33 void cgroup_context_init(CGroupContext
*c
) {
36 /* Initialize everything to the kernel defaults, assuming the
37 * structure is preinitialized to 0 */
39 c
->cpu_shares
= (unsigned long) -1;
40 c
->startup_cpu_shares
= (unsigned long) -1;
41 c
->memory_limit
= (uint64_t) -1;
42 c
->blockio_weight
= (unsigned long) -1;
43 c
->startup_blockio_weight
= (unsigned long) -1;
45 c
->cpu_quota_per_sec_usec
= USEC_INFINITY
;
48 void cgroup_context_free_device_allow(CGroupContext
*c
, CGroupDeviceAllow
*a
) {
52 LIST_REMOVE(device_allow
, c
->device_allow
, a
);
57 void cgroup_context_free_blockio_device_weight(CGroupContext
*c
, CGroupBlockIODeviceWeight
*w
) {
61 LIST_REMOVE(device_weights
, c
->blockio_device_weights
, w
);
66 void cgroup_context_free_blockio_device_bandwidth(CGroupContext
*c
, CGroupBlockIODeviceBandwidth
*b
) {
70 LIST_REMOVE(device_bandwidths
, c
->blockio_device_bandwidths
, b
);
75 void cgroup_context_done(CGroupContext
*c
) {
78 while (c
->blockio_device_weights
)
79 cgroup_context_free_blockio_device_weight(c
, c
->blockio_device_weights
);
81 while (c
->blockio_device_bandwidths
)
82 cgroup_context_free_blockio_device_bandwidth(c
, c
->blockio_device_bandwidths
);
84 while (c
->device_allow
)
85 cgroup_context_free_device_allow(c
, c
->device_allow
);
88 void cgroup_context_dump(CGroupContext
*c
, FILE* f
, const char *prefix
) {
89 CGroupBlockIODeviceBandwidth
*b
;
90 CGroupBlockIODeviceWeight
*w
;
92 char u
[FORMAT_TIMESPAN_MAX
];
97 prefix
= strempty(prefix
);
100 "%sCPUAccounting=%s\n"
101 "%sBlockIOAccounting=%s\n"
102 "%sMemoryAccounting=%s\n"
104 "%sStartupCPUShares=%lu\n"
105 "%sCPUQuotaPerSecSec=%s\n"
106 "%sBlockIOWeight=%lu\n"
107 "%sStartupBlockIOWeight=%lu\n"
108 "%sMemoryLimit=%" PRIu64
"\n"
109 "%sDevicePolicy=%s\n"
111 prefix
, yes_no(c
->cpu_accounting
),
112 prefix
, yes_no(c
->blockio_accounting
),
113 prefix
, yes_no(c
->memory_accounting
),
114 prefix
, c
->cpu_shares
,
115 prefix
, c
->startup_cpu_shares
,
116 prefix
, format_timespan(u
, sizeof(u
), c
->cpu_quota_per_sec_usec
, 1),
117 prefix
, c
->blockio_weight
,
118 prefix
, c
->startup_blockio_weight
,
119 prefix
, c
->memory_limit
,
120 prefix
, cgroup_device_policy_to_string(c
->device_policy
),
121 prefix
, yes_no(c
->delegate
));
123 LIST_FOREACH(device_allow
, a
, c
->device_allow
)
125 "%sDeviceAllow=%s %s%s%s\n",
128 a
->r
? "r" : "", a
->w
? "w" : "", a
->m
? "m" : "");
130 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
)
132 "%sBlockIODeviceWeight=%s %lu",
137 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
138 char buf
[FORMAT_BYTES_MAX
];
143 b
->read
? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
145 format_bytes(buf
, sizeof(buf
), b
->bandwidth
));
149 static int lookup_blkio_device(const char *p
, dev_t
*dev
) {
158 return log_warning_errno(errno
, "Couldn't stat device %s: %m", p
);
160 if (S_ISBLK(st
.st_mode
))
162 else if (major(st
.st_dev
) != 0) {
163 /* If this is not a device node then find the block
164 * device this file is stored on */
167 /* If this is a partition, try to get the originating
169 block_get_whole_disk(*dev
, dev
);
171 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p
);
178 static int whitelist_device(const char *path
, const char *node
, const char *acc
) {
179 char buf
[2+DECIMAL_STR_MAX(dev_t
)*2+2+4];
186 if (stat(node
, &st
) < 0) {
187 log_warning("Couldn't stat device %s", node
);
191 if (!S_ISCHR(st
.st_mode
) && !S_ISBLK(st
.st_mode
)) {
192 log_warning("%s is not a device.", node
);
198 S_ISCHR(st
.st_mode
) ? 'c' : 'b',
199 major(st
.st_rdev
), minor(st
.st_rdev
),
202 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
204 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
, -EINVAL
) ? LOG_DEBUG
: LOG_WARNING
, r
,
205 "Failed to set devices.allow on %s: %m", path
);
210 static int whitelist_major(const char *path
, const char *name
, char type
, const char *acc
) {
211 _cleanup_fclose_
FILE *f
= NULL
;
218 assert(type
== 'b' || type
== 'c');
220 f
= fopen("/proc/devices", "re");
222 return log_warning_errno(errno
, "Cannot open /proc/devices to resolve %s (%c): %m", name
, type
);
224 FOREACH_LINE(line
, f
, goto fail
) {
225 char buf
[2+DECIMAL_STR_MAX(unsigned)+3+4], *p
, *w
;
230 if (type
== 'c' && streq(line
, "Character devices:")) {
235 if (type
== 'b' && streq(line
, "Block devices:")) {
250 w
= strpbrk(p
, WHITESPACE
);
255 r
= safe_atou(p
, &maj
);
262 w
+= strspn(w
, WHITESPACE
);
264 if (fnmatch(name
, w
, 0) != 0)
273 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
275 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
, -EINVAL
) ? LOG_DEBUG
: LOG_WARNING
, r
,
276 "Failed to set devices.allow on %s: %m", path
);
282 log_warning_errno(errno
, "Failed to read /proc/devices: %m");
286 void cgroup_context_apply(CGroupContext
*c
, CGroupControllerMask mask
, const char *path
, ManagerState state
) {
296 /* Some cgroup attributes are not supported on the root cgroup,
297 * hence silently ignore */
298 is_root
= isempty(path
) || path_equal(path
, "/");
300 /* Make sure we don't try to display messages with an empty path. */
303 /* We generally ignore errors caused by read-only mounted
304 * cgroup trees (assuming we are running in a container then),
305 * and missing cgroups, i.e. EROFS and ENOENT. */
307 if ((mask
& CGROUP_CPU
) && !is_root
) {
308 char buf
[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t
)) + 1];
310 sprintf(buf
, "%lu\n",
311 IN_SET(state
, MANAGER_STARTING
, MANAGER_INITIALIZING
) && c
->startup_cpu_shares
!= (unsigned long) -1 ? c
->startup_cpu_shares
:
312 c
->cpu_shares
!= (unsigned long) -1 ? c
->cpu_shares
: 1024);
313 r
= cg_set_attribute("cpu", path
, "cpu.shares", buf
);
315 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
316 "Failed to set cpu.shares on %s: %m", path
);
318 sprintf(buf
, USEC_FMT
"\n", CGROUP_CPU_QUOTA_PERIOD_USEC
);
319 r
= cg_set_attribute("cpu", path
, "cpu.cfs_period_us", buf
);
321 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
322 "Failed to set cpu.cfs_period_us on %s: %m", path
);
324 if (c
->cpu_quota_per_sec_usec
!= USEC_INFINITY
) {
325 sprintf(buf
, USEC_FMT
"\n", c
->cpu_quota_per_sec_usec
* CGROUP_CPU_QUOTA_PERIOD_USEC
/ USEC_PER_SEC
);
326 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", buf
);
328 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", "-1");
330 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
331 "Failed to set cpu.cfs_quota_us on %s: %m", path
);
334 if (mask
& CGROUP_BLKIO
) {
335 char buf
[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
336 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
337 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
338 CGroupBlockIODeviceWeight
*w
;
339 CGroupBlockIODeviceBandwidth
*b
;
342 sprintf(buf
, "%lu\n", IN_SET(state
, MANAGER_STARTING
, MANAGER_INITIALIZING
) && c
->startup_blockio_weight
!= (unsigned long) -1 ? c
->startup_blockio_weight
:
343 c
->blockio_weight
!= (unsigned long) -1 ? c
->blockio_weight
: 1000);
344 r
= cg_set_attribute("blkio", path
, "blkio.weight", buf
);
346 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
347 "Failed to set blkio.weight on %s: %m", path
);
349 /* FIXME: no way to reset this list */
350 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
) {
353 r
= lookup_blkio_device(w
->path
, &dev
);
357 sprintf(buf
, "%u:%u %lu", major(dev
), minor(dev
), w
->weight
);
358 r
= cg_set_attribute("blkio", path
, "blkio.weight_device", buf
);
360 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
361 "Failed to set blkio.weight_device on %s: %m", path
);
365 /* FIXME: no way to reset this list */
366 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
370 r
= lookup_blkio_device(b
->path
, &dev
);
374 a
= b
->read
? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
376 sprintf(buf
, "%u:%u %" PRIu64
"\n", major(dev
), minor(dev
), b
->bandwidth
);
377 r
= cg_set_attribute("blkio", path
, a
, buf
);
379 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
380 "Failed to set %s on %s: %m", a
, path
);
384 if ((mask
& CGROUP_MEMORY
) && !is_root
) {
385 if (c
->memory_limit
!= (uint64_t) -1) {
386 char buf
[DECIMAL_STR_MAX(uint64_t) + 1];
388 sprintf(buf
, "%" PRIu64
"\n", c
->memory_limit
);
389 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", buf
);
391 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", "-1");
394 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
) ? LOG_DEBUG
: LOG_WARNING
, r
,
395 "Failed to set memory.limit_in_bytes on %s: %m", path
);
398 if ((mask
& CGROUP_DEVICE
) && !is_root
) {
399 CGroupDeviceAllow
*a
;
401 /* Changing the devices list of a populated cgroup
402 * might result in EINVAL, hence ignore EINVAL
405 if (c
->device_allow
|| c
->device_policy
!= CGROUP_AUTO
)
406 r
= cg_set_attribute("devices", path
, "devices.deny", "a");
408 r
= cg_set_attribute("devices", path
, "devices.allow", "a");
410 log_full_errno(IN_SET(r
, -ENOENT
, -EROFS
, -EINVAL
) ? LOG_DEBUG
: LOG_WARNING
, r
,
411 "Failed to reset devices.list on %s: %m", path
);
413 if (c
->device_policy
== CGROUP_CLOSED
||
414 (c
->device_policy
== CGROUP_AUTO
&& c
->device_allow
)) {
415 static const char auto_devices
[] =
416 "/dev/null\0" "rwm\0"
417 "/dev/zero\0" "rwm\0"
418 "/dev/full\0" "rwm\0"
419 "/dev/random\0" "rwm\0"
420 "/dev/urandom\0" "rwm\0"
422 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
426 NULSTR_FOREACH_PAIR(x
, y
, auto_devices
)
427 whitelist_device(path
, x
, y
);
429 whitelist_major(path
, "pts", 'c', "rw");
430 whitelist_major(path
, "kdbus", 'c', "rw");
431 whitelist_major(path
, "kdbus/*", 'c', "rw");
434 LIST_FOREACH(device_allow
, a
, c
->device_allow
) {
450 if (startswith(a
->path
, "/dev/"))
451 whitelist_device(path
, a
->path
, acc
);
452 else if (startswith(a
->path
, "block-"))
453 whitelist_major(path
, a
->path
+ 6, 'b', acc
);
454 else if (startswith(a
->path
, "char-"))
455 whitelist_major(path
, a
->path
+ 5, 'c', acc
);
457 log_debug("Ignoring device %s while writing cgroup attribute.", a
->path
);
462 CGroupControllerMask
cgroup_context_get_mask(CGroupContext
*c
) {
463 CGroupControllerMask mask
= 0;
465 /* Figure out which controllers we need */
467 if (c
->cpu_accounting
||
468 c
->cpu_shares
!= (unsigned long) -1 ||
469 c
->startup_cpu_shares
!= (unsigned long) -1 ||
470 c
->cpu_quota_per_sec_usec
!= USEC_INFINITY
)
471 mask
|= CGROUP_CPUACCT
| CGROUP_CPU
;
473 if (c
->blockio_accounting
||
474 c
->blockio_weight
!= (unsigned long) -1 ||
475 c
->startup_blockio_weight
!= (unsigned long) -1 ||
476 c
->blockio_device_weights
||
477 c
->blockio_device_bandwidths
)
478 mask
|= CGROUP_BLKIO
;
480 if (c
->memory_accounting
||
481 c
->memory_limit
!= (uint64_t) -1)
482 mask
|= CGROUP_MEMORY
;
484 if (c
->device_allow
||
485 c
->device_policy
!= CGROUP_AUTO
)
486 mask
|= CGROUP_DEVICE
;
491 CGroupControllerMask
unit_get_cgroup_mask(Unit
*u
) {
494 c
= unit_get_cgroup_context(u
);
498 /* If delegation is turned on, then turn on all cgroups,
499 * unless the process we fork into it is known to drop
500 * privileges anyway, and shouldn't get access to the
501 * controllers anyway. */
506 e
= unit_get_exec_context(u
);
507 if (!e
|| exec_context_maintains_privileges(e
))
508 return _CGROUP_CONTROLLER_MASK_ALL
;
511 return cgroup_context_get_mask(c
);
514 CGroupControllerMask
unit_get_members_mask(Unit
*u
) {
517 if (u
->cgroup_members_mask_valid
)
518 return u
->cgroup_members_mask
;
520 u
->cgroup_members_mask
= 0;
522 if (u
->type
== UNIT_SLICE
) {
526 SET_FOREACH(member
, u
->dependencies
[UNIT_BEFORE
], i
) {
531 if (UNIT_DEREF(member
->slice
) != u
)
534 u
->cgroup_members_mask
|=
535 unit_get_cgroup_mask(member
) |
536 unit_get_members_mask(member
);
540 u
->cgroup_members_mask_valid
= true;
541 return u
->cgroup_members_mask
;
544 CGroupControllerMask
unit_get_siblings_mask(Unit
*u
) {
547 if (UNIT_ISSET(u
->slice
))
548 return unit_get_members_mask(UNIT_DEREF(u
->slice
));
550 return unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
553 CGroupControllerMask
unit_get_target_mask(Unit
*u
) {
554 CGroupControllerMask mask
;
556 mask
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
) | unit_get_siblings_mask(u
);
557 mask
&= u
->manager
->cgroup_supported
;
562 /* Recurse from a unit up through its containing slices, propagating
563 * mask bits upward. A unit is also member of itself. */
564 void unit_update_cgroup_members_masks(Unit
*u
) {
565 CGroupControllerMask m
;
570 /* Calculate subtree mask */
571 m
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
573 /* See if anything changed from the previous invocation. If
574 * not, we're done. */
575 if (u
->cgroup_subtree_mask_valid
&& m
== u
->cgroup_subtree_mask
)
579 u
->cgroup_subtree_mask_valid
&&
580 ((m
& ~u
->cgroup_subtree_mask
) != 0) &&
581 ((~m
& u
->cgroup_subtree_mask
) == 0);
583 u
->cgroup_subtree_mask
= m
;
584 u
->cgroup_subtree_mask_valid
= true;
586 if (UNIT_ISSET(u
->slice
)) {
587 Unit
*s
= UNIT_DEREF(u
->slice
);
590 /* There's more set now than before. We
591 * propagate the new mask to the parent's mask
592 * (not caring if it actually was valid or
595 s
->cgroup_members_mask
|= m
;
598 /* There's less set now than before (or we
599 * don't know), we need to recalculate
600 * everything, so let's invalidate the
601 * parent's members mask */
603 s
->cgroup_members_mask_valid
= false;
605 /* And now make sure that this change also hits our
607 unit_update_cgroup_members_masks(s
);
611 static const char *migrate_callback(CGroupControllerMask mask
, void *userdata
) {
618 if (u
->cgroup_path
&&
619 u
->cgroup_realized
&&
620 (u
->cgroup_realized_mask
& mask
) == mask
)
621 return u
->cgroup_path
;
623 u
= UNIT_DEREF(u
->slice
);
629 static int unit_create_cgroups(Unit
*u
, CGroupControllerMask mask
) {
635 c
= unit_get_cgroup_context(u
);
639 if (!u
->cgroup_path
) {
640 _cleanup_free_
char *path
= NULL
;
642 path
= unit_default_cgroup_path(u
);
646 r
= hashmap_put(u
->manager
->cgroup_unit
, path
, u
);
648 log_error(r
== -EEXIST
? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path
, strerror(-r
));
652 u
->cgroup_path
= path
;
657 /* First, create our own group */
658 r
= cg_create_everywhere(u
->manager
->cgroup_supported
, mask
, u
->cgroup_path
);
660 return log_error_errno(r
, "Failed to create cgroup %s: %m", u
->cgroup_path
);
662 /* Keep track that this is now realized */
663 u
->cgroup_realized
= true;
664 u
->cgroup_realized_mask
= mask
;
666 if (u
->type
!= UNIT_SLICE
&& !c
->delegate
) {
668 /* Then, possibly move things over, but not if
669 * subgroups may contain processes, which is the case
670 * for slice and delegation units. */
671 r
= cg_migrate_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, u
->cgroup_path
, migrate_callback
, u
);
673 log_warning_errno(r
, "Failed to migrate cgroup from to %s: %m", u
->cgroup_path
);
679 int unit_attach_pids_to_cgroup(Unit
*u
) {
683 r
= unit_realize_cgroup(u
);
687 r
= cg_attach_many_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, u
->pids
, migrate_callback
, u
);
694 static bool unit_has_mask_realized(Unit
*u
, CGroupControllerMask mask
) {
697 return u
->cgroup_realized
&& u
->cgroup_realized_mask
== mask
;
700 /* Check if necessary controllers and attributes for a unit are in place.
703 * If not, create paths, move processes over, and set attributes.
705 * Returns 0 on success and < 0 on failure. */
706 static int unit_realize_cgroup_now(Unit
*u
, ManagerState state
) {
707 CGroupControllerMask mask
;
712 if (u
->in_cgroup_queue
) {
713 LIST_REMOVE(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
714 u
->in_cgroup_queue
= false;
717 mask
= unit_get_target_mask(u
);
719 if (unit_has_mask_realized(u
, mask
))
722 /* First, realize parents */
723 if (UNIT_ISSET(u
->slice
)) {
724 r
= unit_realize_cgroup_now(UNIT_DEREF(u
->slice
), state
);
729 /* And then do the real work */
730 r
= unit_create_cgroups(u
, mask
);
734 /* Finally, apply the necessary attributes. */
735 cgroup_context_apply(unit_get_cgroup_context(u
), mask
, u
->cgroup_path
, state
);
740 static void unit_add_to_cgroup_queue(Unit
*u
) {
742 if (u
->in_cgroup_queue
)
745 LIST_PREPEND(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
746 u
->in_cgroup_queue
= true;
749 unsigned manager_dispatch_cgroup_queue(Manager
*m
) {
755 state
= manager_state(m
);
757 while ((i
= m
->cgroup_queue
)) {
758 assert(i
->in_cgroup_queue
);
760 r
= unit_realize_cgroup_now(i
, state
);
762 log_warning_errno(r
, "Failed to realize cgroups for queued unit %s: %m", i
->id
);
770 static void unit_queue_siblings(Unit
*u
) {
773 /* This adds the siblings of the specified unit and the
774 * siblings of all parent units to the cgroup queue. (But
775 * neither the specified unit itself nor the parents.) */
777 while ((slice
= UNIT_DEREF(u
->slice
))) {
781 SET_FOREACH(m
, slice
->dependencies
[UNIT_BEFORE
], i
) {
785 /* Skip units that have a dependency on the slice
786 * but aren't actually in it. */
787 if (UNIT_DEREF(m
->slice
) != slice
)
790 /* No point in doing cgroup application for units
791 * without active processes. */
792 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m
)))
795 /* If the unit doesn't need any new controllers
796 * and has current ones realized, it doesn't need
798 if (unit_has_mask_realized(m
, unit_get_target_mask(m
)))
801 unit_add_to_cgroup_queue(m
);
808 int unit_realize_cgroup(Unit
*u
) {
811 if (!UNIT_HAS_CGROUP_CONTEXT(u
))
814 /* So, here's the deal: when realizing the cgroups for this
815 * unit, we need to first create all parents, but there's more
816 * actually: for the weight-based controllers we also need to
817 * make sure that all our siblings (i.e. units that are in the
818 * same slice as we are) have cgroups, too. Otherwise, things
819 * would become very uneven as each of their processes would
820 * get as much resources as all our group together. This call
821 * will synchronously create the parent cgroups, but will
822 * defer work on the siblings to the next event loop
825 /* Add all sibling slices to the cgroup queue. */
826 unit_queue_siblings(u
);
828 /* And realize this one now (and apply the values) */
829 return unit_realize_cgroup_now(u
, manager_state(u
->manager
));
832 void unit_destroy_cgroup_if_empty(Unit
*u
) {
840 r
= cg_trim_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, !unit_has_name(u
, SPECIAL_ROOT_SLICE
));
842 log_debug_errno(r
, "Failed to destroy cgroup %s: %m", u
->cgroup_path
);
846 hashmap_remove(u
->manager
->cgroup_unit
, u
->cgroup_path
);
848 free(u
->cgroup_path
);
849 u
->cgroup_path
= NULL
;
850 u
->cgroup_realized
= false;
851 u
->cgroup_realized_mask
= 0;
854 pid_t
unit_search_main_pid(Unit
*u
) {
855 _cleanup_fclose_
FILE *f
= NULL
;
856 pid_t pid
= 0, npid
, mypid
;
863 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, &f
) < 0)
867 while (cg_read_pid(f
, &npid
) > 0) {
873 /* Ignore processes that aren't our kids */
874 if (get_parent_of_pid(npid
, &ppid
) >= 0 && ppid
!= mypid
)
878 /* Dang, there's more than one daemonized PID
879 in this group, so we don't know what process
880 is the main process. */
891 int manager_setup_cgroup(Manager
*m
) {
892 _cleanup_free_
char *path
= NULL
;
897 /* 1. Determine hierarchy */
898 free(m
->cgroup_root
);
899 m
->cgroup_root
= NULL
;
901 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &m
->cgroup_root
);
903 return log_error_errno(r
, "Cannot determine cgroup we are running in: %m");
905 /* LEGACY: Already in /system.slice? If so, let's cut this
906 * off. This is to support live upgrades from older systemd
907 * versions where PID 1 was moved there. */
908 if (m
->running_as
== MANAGER_SYSTEM
) {
911 e
= endswith(m
->cgroup_root
, "/" SPECIAL_SYSTEM_SLICE
);
913 e
= endswith(m
->cgroup_root
, "/system");
918 /* And make sure to store away the root value without trailing
919 * slash, even for the root dir, so that we can easily prepend
921 if (streq(m
->cgroup_root
, "/"))
922 m
->cgroup_root
[0] = 0;
925 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, NULL
, &path
);
927 return log_error_errno(r
, "Cannot find cgroup mount point: %m");
929 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER
". File system hierarchy is at %s.", path
);
932 /* 3. Install agent */
933 if (m
->running_as
== MANAGER_SYSTEM
) {
934 r
= cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER
, SYSTEMD_CGROUP_AGENT_PATH
);
936 log_warning_errno(r
, "Failed to install release agent, ignoring: %m");
938 log_debug("Installed release agent.");
940 log_debug("Release agent already installed.");
943 /* 4. Make sure we are in the root cgroup */
944 r
= cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, 0);
946 return log_error_errno(r
, "Failed to create root cgroup hierarchy: %m");
948 /* 5. And pin it, so that it cannot be unmounted */
949 safe_close(m
->pin_cgroupfs_fd
);
951 m
->pin_cgroupfs_fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_DIRECTORY
|O_NOCTTY
|O_NONBLOCK
);
952 if (m
->pin_cgroupfs_fd
< 0)
953 return log_error_errno(errno
, "Failed to open pin file: %m");
955 /* 6. Always enable hierarchical support if it exists... */
956 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
959 /* 7. Figure out which controllers are supported */
960 m
->cgroup_supported
= cg_mask_supported();
965 void manager_shutdown_cgroup(Manager
*m
, bool delete) {
968 /* We can't really delete the group, since we are in it. But
970 if (delete && m
->cgroup_root
)
971 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, false);
973 m
->pin_cgroupfs_fd
= safe_close(m
->pin_cgroupfs_fd
);
975 free(m
->cgroup_root
);
976 m
->cgroup_root
= NULL
;
979 Unit
* manager_get_unit_by_cgroup(Manager
*m
, const char *cgroup
) {
986 u
= hashmap_get(m
->cgroup_unit
, cgroup
);
1000 u
= hashmap_get(m
->cgroup_unit
, p
);
1006 Unit
*manager_get_unit_by_pid(Manager
*m
, pid_t pid
) {
1007 _cleanup_free_
char *cgroup
= NULL
;
1016 u
= hashmap_get(m
->watch_pids1
, LONG_TO_PTR(pid
));
1020 u
= hashmap_get(m
->watch_pids2
, LONG_TO_PTR(pid
));
1024 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &cgroup
);
1028 return manager_get_unit_by_cgroup(m
, cgroup
);
1031 int manager_notify_cgroup_empty(Manager
*m
, const char *cgroup
) {
1038 u
= manager_get_unit_by_cgroup(m
, cgroup
);
1042 r
= cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
);
1046 if (UNIT_VTABLE(u
)->notify_cgroup_empty
)
1047 UNIT_VTABLE(u
)->notify_cgroup_empty(u
);
1049 unit_add_to_gc_queue(u
);
1053 int unit_get_memory_current(Unit
*u
, uint64_t *ret
) {
1054 _cleanup_free_
char *v
= NULL
;
1060 if (!u
->cgroup_path
)
1063 if ((u
->cgroup_realized_mask
& CGROUP_MEMORY
) == 0)
1066 r
= cg_get_attribute("memory", u
->cgroup_path
, "memory.usage_in_bytes", &v
);
1072 return safe_atou64(v
, ret
);
1075 static int unit_get_cpu_usage_raw(Unit
*u
, nsec_t
*ret
) {
1076 _cleanup_free_
char *v
= NULL
;
1083 if (!u
->cgroup_path
)
1086 if ((u
->cgroup_realized_mask
& CGROUP_CPUACCT
) == 0)
1089 r
= cg_get_attribute("cpuacct", u
->cgroup_path
, "cpuacct.usage", &v
);
1095 r
= safe_atou64(v
, &ns
);
1103 int unit_get_cpu_usage(Unit
*u
, nsec_t
*ret
) {
1107 r
= unit_get_cpu_usage_raw(u
, &ns
);
1111 if (ns
> u
->cpuacct_usage_base
)
1112 ns
-= u
->cpuacct_usage_base
;
1120 int unit_reset_cpu_usage(Unit
*u
) {
1126 r
= unit_get_cpu_usage_raw(u
, &ns
);
1128 u
->cpuacct_usage_base
= 0;
1132 u
->cpuacct_usage_base
= ns
;
1136 bool unit_cgroup_delegate(Unit
*u
) {
1141 c
= unit_get_cgroup_context(u
);
1148 static const char* const cgroup_device_policy_table
[_CGROUP_DEVICE_POLICY_MAX
] = {
1149 [CGROUP_AUTO
] = "auto",
1150 [CGROUP_CLOSED
] = "closed",
1151 [CGROUP_STRICT
] = "strict",
1154 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy
, CGroupDevicePolicy
);