1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include "path-util.h"
27 #include "cgroup-util.h"
30 void cgroup_context_init(CGroupContext
*c
) {
33 /* Initialize everything to the kernel defaults, assuming the
34 * structure is preinitialized to 0 */
37 c
->startup_cpu_shares
= 1024;
38 c
->startup_cpu_shares_set
= false;
39 c
->memory_limit
= (uint64_t) -1;
40 c
->blockio_weight
= 1000;
41 c
->startup_blockio_weight
= 1000;
42 c
->startup_blockio_weight_set
= false;
44 c
->cpu_quota_per_sec_usec
= (usec_t
) -1;
45 c
->cpu_quota_usec
= (usec_t
) -1;
46 c
->cpu_quota_period_usec
= 100*USEC_PER_MSEC
;
49 void cgroup_context_free_device_allow(CGroupContext
*c
, CGroupDeviceAllow
*a
) {
53 LIST_REMOVE(device_allow
, c
->device_allow
, a
);
58 void cgroup_context_free_blockio_device_weight(CGroupContext
*c
, CGroupBlockIODeviceWeight
*w
) {
62 LIST_REMOVE(device_weights
, c
->blockio_device_weights
, w
);
67 void cgroup_context_free_blockio_device_bandwidth(CGroupContext
*c
, CGroupBlockIODeviceBandwidth
*b
) {
71 LIST_REMOVE(device_bandwidths
, c
->blockio_device_bandwidths
, b
);
76 void cgroup_context_done(CGroupContext
*c
) {
79 while (c
->blockio_device_weights
)
80 cgroup_context_free_blockio_device_weight(c
, c
->blockio_device_weights
);
82 while (c
->blockio_device_bandwidths
)
83 cgroup_context_free_blockio_device_bandwidth(c
, c
->blockio_device_bandwidths
);
85 while (c
->device_allow
)
86 cgroup_context_free_device_allow(c
, c
->device_allow
);
89 usec_t
cgroup_context_get_cpu_quota_usec(CGroupContext
*c
) {
92 /* Returns the absolute CPU quota */
94 if (c
->cpu_quota_usec
!= (usec_t
) -1)
95 return c
->cpu_quota_usec
;
96 else if (c
->cpu_quota_per_sec_usec
!= (usec_t
) -1)
97 return c
->cpu_quota_per_sec_usec
*c
->cpu_quota_period_usec
/USEC_PER_SEC
;
102 usec_t
cgroup_context_get_cpu_quota_per_sec_usec(CGroupContext
*c
) {
105 /* Returns the CPU quota relative to 1s */
107 if (c
->cpu_quota_usec
!= (usec_t
) -1)
108 return c
->cpu_quota_usec
*USEC_PER_SEC
/c
->cpu_quota_period_usec
;
109 else if (c
->cpu_quota_per_sec_usec
!= (usec_t
) -1)
110 return c
->cpu_quota_per_sec_usec
;
115 void cgroup_context_dump(CGroupContext
*c
, FILE* f
, const char *prefix
) {
116 CGroupBlockIODeviceBandwidth
*b
;
117 CGroupBlockIODeviceWeight
*w
;
118 CGroupDeviceAllow
*a
;
119 char t
[FORMAT_TIMESPAN_MAX
], s
[FORMAT_TIMESPAN_MAX
], u
[FORMAT_TIMESPAN_MAX
];
124 prefix
= strempty(prefix
);
127 "%sCPUAccounting=%s\n"
128 "%sBlockIOAccounting=%s\n"
129 "%sMemoryAccounting=%s\n"
131 "%sStartupCPUShares=%lu\n"
133 "%sCPUQuotaPerSecSec=%s\n"
134 "%sCPUQuotaPeriodSec=%s\n"
135 "%sBlockIOWeight=%lu\n"
136 "%sStartupBlockIOWeight=%lu\n"
137 "%sMemoryLimit=%" PRIu64
"\n"
138 "%sDevicePolicy=%s\n",
139 prefix
, yes_no(c
->cpu_accounting
),
140 prefix
, yes_no(c
->blockio_accounting
),
141 prefix
, yes_no(c
->memory_accounting
),
142 prefix
, c
->cpu_shares
,
143 prefix
, c
->startup_cpu_shares
,
144 prefix
, strna(format_timespan(u
, sizeof(u
), cgroup_context_get_cpu_quota_usec(c
), 1)),
145 prefix
, strna(format_timespan(t
, sizeof(t
), cgroup_context_get_cpu_quota_per_sec_usec(c
), 1)),
146 prefix
, strna(format_timespan(s
, sizeof(s
), c
->cpu_quota_period_usec
, 1)),
147 prefix
, c
->blockio_weight
,
148 prefix
, c
->startup_blockio_weight
,
149 prefix
, c
->memory_limit
,
150 prefix
, cgroup_device_policy_to_string(c
->device_policy
));
152 LIST_FOREACH(device_allow
, a
, c
->device_allow
)
154 "%sDeviceAllow=%s %s%s%s\n",
157 a
->r
? "r" : "", a
->w
? "w" : "", a
->m
? "m" : "");
159 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
)
161 "%sBlockIODeviceWeight=%s %lu",
166 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
167 char buf
[FORMAT_BYTES_MAX
];
172 b
->read
? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
174 format_bytes(buf
, sizeof(buf
), b
->bandwidth
));
178 static int lookup_blkio_device(const char *p
, dev_t
*dev
) {
187 log_warning("Couldn't stat device %s: %m", p
);
191 if (S_ISBLK(st
.st_mode
))
193 else if (major(st
.st_dev
) != 0) {
194 /* If this is not a device node then find the block
195 * device this file is stored on */
198 /* If this is a partition, try to get the originating
200 block_get_whole_disk(*dev
, dev
);
202 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p
);
209 static int whitelist_device(const char *path
, const char *node
, const char *acc
) {
210 char buf
[2+DECIMAL_STR_MAX(dev_t
)*2+2+4];
217 if (stat(node
, &st
) < 0) {
218 log_warning("Couldn't stat device %s", node
);
222 if (!S_ISCHR(st
.st_mode
) && !S_ISBLK(st
.st_mode
)) {
223 log_warning("%s is not a device.", node
);
229 S_ISCHR(st
.st_mode
) ? 'c' : 'b',
230 major(st
.st_rdev
), minor(st
.st_rdev
),
233 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
235 log_warning("Failed to set devices.allow on %s: %s", path
, strerror(-r
));
240 static int whitelist_major(const char *path
, const char *name
, char type
, const char *acc
) {
241 _cleanup_fclose_
FILE *f
= NULL
;
248 assert(type
== 'b' || type
== 'c');
250 f
= fopen("/proc/devices", "re");
252 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name
, type
);
256 FOREACH_LINE(line
, f
, goto fail
) {
257 char buf
[2+DECIMAL_STR_MAX(unsigned)+3+4], *p
, *w
;
262 if (type
== 'c' && streq(line
, "Character devices:")) {
267 if (type
== 'b' && streq(line
, "Block devices:")) {
282 w
= strpbrk(p
, WHITESPACE
);
287 r
= safe_atou(p
, &maj
);
294 w
+= strspn(w
, WHITESPACE
);
296 if (fnmatch(name
, w
, 0) != 0)
305 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
307 log_warning("Failed to set devices.allow on %s: %s", path
, strerror(-r
));
313 log_warning("Failed to read /proc/devices: %m");
317 void cgroup_context_apply(Manager
*m
, CGroupContext
*c
, CGroupControllerMask mask
, const char *path
) {
327 /* Some cgroup attributes are not support on the root cgroup,
328 * hence silently ignore */
329 is_root
= isempty(path
) || path_equal(path
, "/");
331 if ((mask
& CGROUP_CPU
) && !is_root
) {
332 char buf
[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t
)) + 1];
335 sprintf(buf
, "%lu\n", manager_state(m
) == MANAGER_STARTING
336 ? c
->startup_cpu_shares
338 r
= cg_set_attribute("cpu", path
, "cpu.shares", buf
);
340 log_warning("Failed to set cpu.shares on %s: %s", path
, strerror(-r
));
342 sprintf(buf
, USEC_FMT
"\n", c
->cpu_quota_period_usec
);
343 r
= cg_set_attribute("cpu", path
, "cpu.cfs_period_us", buf
);
345 log_warning("Failed to set cpu.cfs_period_us on %s: %s", path
, strerror(-r
));
347 q
= cgroup_context_get_cpu_quota_usec(c
);
348 if (q
!= (usec_t
) -1) {
349 sprintf(buf
, USEC_FMT
"\n", q
);
350 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", buf
);
352 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", "-1");
354 log_warning("Failed to set cpu.cfs_quota_us on %s: %s", path
, strerror(-r
));
357 if (mask
& CGROUP_BLKIO
) {
358 char buf
[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
359 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
360 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
361 CGroupBlockIODeviceWeight
*w
;
362 CGroupBlockIODeviceBandwidth
*b
;
365 sprintf(buf
, "%lu\n", manager_state(m
) == MANAGER_STARTING
366 ? c
->startup_blockio_weight
367 : c
->blockio_weight
);
368 r
= cg_set_attribute("blkio", path
, "blkio.weight", buf
);
370 log_warning("Failed to set blkio.weight on %s: %s", path
, strerror(-r
));
372 /* FIXME: no way to reset this list */
373 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
) {
376 r
= lookup_blkio_device(w
->path
, &dev
);
380 sprintf(buf
, "%u:%u %lu", major(dev
), minor(dev
), w
->weight
);
381 r
= cg_set_attribute("blkio", path
, "blkio.weight_device", buf
);
383 log_error("Failed to set blkio.weight_device on %s: %s", path
, strerror(-r
));
387 /* FIXME: no way to reset this list */
388 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
392 r
= lookup_blkio_device(b
->path
, &dev
);
396 a
= b
->read
? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
398 sprintf(buf
, "%u:%u %" PRIu64
"\n", major(dev
), minor(dev
), b
->bandwidth
);
399 r
= cg_set_attribute("blkio", path
, a
, buf
);
401 log_error("Failed to set %s on %s: %s", a
, path
, strerror(-r
));
405 if (mask
& CGROUP_MEMORY
) {
406 if (c
->memory_limit
!= (uint64_t) -1) {
407 char buf
[DECIMAL_STR_MAX(uint64_t) + 1];
409 sprintf(buf
, "%" PRIu64
"\n", c
->memory_limit
);
410 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", buf
);
412 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", "-1");
415 log_error("Failed to set memory.limit_in_bytes on %s: %s", path
, strerror(-r
));
418 if ((mask
& CGROUP_DEVICE
) && !is_root
) {
419 CGroupDeviceAllow
*a
;
421 if (c
->device_allow
|| c
->device_policy
!= CGROUP_AUTO
)
422 r
= cg_set_attribute("devices", path
, "devices.deny", "a");
424 r
= cg_set_attribute("devices", path
, "devices.allow", "a");
426 log_warning("Failed to reset devices.list on %s: %s", path
, strerror(-r
));
428 if (c
->device_policy
== CGROUP_CLOSED
||
429 (c
->device_policy
== CGROUP_AUTO
&& c
->device_allow
)) {
430 static const char auto_devices
[] =
431 "/dev/null\0" "rwm\0"
432 "/dev/zero\0" "rwm\0"
433 "/dev/full\0" "rwm\0"
434 "/dev/random\0" "rwm\0"
435 "/dev/urandom\0" "rwm\0"
437 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
441 NULSTR_FOREACH_PAIR(x
, y
, auto_devices
)
442 whitelist_device(path
, x
, y
);
444 whitelist_major(path
, "pts", 'c', "rw");
445 whitelist_major(path
, "kdbus", 'c', "rw");
446 whitelist_major(path
, "kdbus/*", 'c', "rw");
449 LIST_FOREACH(device_allow
, a
, c
->device_allow
) {
465 if (startswith(a
->path
, "/dev/"))
466 whitelist_device(path
, a
->path
, acc
);
467 else if (startswith(a
->path
, "block-"))
468 whitelist_major(path
, a
->path
+ 6, 'b', acc
);
469 else if (startswith(a
->path
, "char-"))
470 whitelist_major(path
, a
->path
+ 5, 'c', acc
);
472 log_debug("Ignoring device %s while writing cgroup attribute.", a
->path
);
477 CGroupControllerMask
cgroup_context_get_mask(Manager
*m
, CGroupContext
*c
) {
478 CGroupControllerMask mask
= 0;
480 /* Figure out which controllers we need */
482 if (c
->cpu_accounting
||
483 (manager_state(m
) == MANAGER_STARTING
? c
->startup_cpu_shares
: c
->cpu_shares
) != 1024 ||
484 (manager_state(m
) != MANAGER_STARTING
&& c
->startup_cpu_shares_set
&& c
->startup_cpu_shares
!= c
->cpu_shares
) ||
485 c
->cpu_quota_usec
!= (usec_t
) -1 ||
486 c
->cpu_quota_per_sec_usec
!= (usec_t
) -1) {
487 mask
|= CGROUP_CPUACCT
| CGROUP_CPU
;
488 if (manager_state(m
) != MANAGER_STARTING
)
489 c
->startup_cpu_shares_set
= false;
492 if (c
->blockio_accounting
||
493 (manager_state(m
) == MANAGER_STARTING
? c
->startup_blockio_weight
: c
->blockio_weight
) != 1000 ||
494 (manager_state(m
) != MANAGER_STARTING
&& c
->startup_blockio_weight_set
&& c
->startup_blockio_weight
!= c
->blockio_weight
) ||
495 c
->blockio_device_weights
||
496 c
->blockio_device_bandwidths
) {
497 mask
|= CGROUP_BLKIO
;
498 if (manager_state(m
) != MANAGER_STARTING
)
499 c
->startup_blockio_weight_set
= false;
502 if (c
->memory_accounting
||
503 c
->memory_limit
!= (uint64_t) -1)
504 mask
|= CGROUP_MEMORY
;
506 if (c
->device_allow
|| c
->device_policy
!= CGROUP_AUTO
)
507 mask
|= CGROUP_DEVICE
;
512 CGroupControllerMask
unit_get_cgroup_mask(Unit
*u
) {
515 c
= unit_get_cgroup_context(u
);
519 return cgroup_context_get_mask(u
->manager
, c
);
522 CGroupControllerMask
unit_get_members_mask(Unit
*u
) {
525 if (u
->cgroup_members_mask_valid
)
526 return u
->cgroup_members_mask
;
528 u
->cgroup_members_mask
= 0;
530 if (u
->type
== UNIT_SLICE
) {
534 SET_FOREACH(member
, u
->dependencies
[UNIT_BEFORE
], i
) {
539 if (UNIT_DEREF(member
->slice
) != u
)
542 u
->cgroup_members_mask
|=
543 unit_get_cgroup_mask(member
) |
544 unit_get_members_mask(member
);
548 u
->cgroup_members_mask_valid
= true;
549 return u
->cgroup_members_mask
;
552 CGroupControllerMask
unit_get_siblings_mask(Unit
*u
) {
553 CGroupControllerMask m
;
557 if (UNIT_ISSET(u
->slice
))
558 m
= unit_get_members_mask(UNIT_DEREF(u
->slice
));
560 m
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
562 /* Sibling propagation is only relevant for weight-based
563 * controllers, so let's mask out everything else */
564 return m
& (CGROUP_CPU
|CGROUP_BLKIO
|CGROUP_CPUACCT
);
567 CGroupControllerMask
unit_get_target_mask(Unit
*u
) {
568 CGroupControllerMask mask
;
570 mask
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
) | unit_get_siblings_mask(u
);
571 mask
&= u
->manager
->cgroup_supported
;
576 /* Recurse from a unit up through its containing slices, propagating
577 * mask bits upward. A unit is also member of itself. */
578 void unit_update_cgroup_members_masks(Unit
*u
) {
579 CGroupControllerMask m
;
584 /* Calculate subtree mask */
585 m
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
587 /* See if anything changed from the previous invocation. If
588 * not, we're done. */
589 if (u
->cgroup_subtree_mask_valid
&& m
== u
->cgroup_subtree_mask
)
593 u
->cgroup_subtree_mask_valid
&&
594 ((m
& ~u
->cgroup_subtree_mask
) != 0) &&
595 ((~m
& u
->cgroup_subtree_mask
) == 0);
597 u
->cgroup_subtree_mask
= m
;
598 u
->cgroup_subtree_mask_valid
= true;
600 if (UNIT_ISSET(u
->slice
)) {
601 Unit
*s
= UNIT_DEREF(u
->slice
);
604 /* There's more set now than before. We
605 * propagate the new mask to the parent's mask
606 * (not caring if it actually was valid or
609 s
->cgroup_members_mask
|= m
;
612 /* There's less set now than before (or we
613 * don't know), we need to recalculate
614 * everything, so let's invalidate the
615 * parent's members mask */
617 s
->cgroup_members_mask_valid
= false;
619 /* And now make sure that this change also hits our
621 unit_update_cgroup_members_masks(s
);
625 static const char *migrate_callback(CGroupControllerMask mask
, void *userdata
) {
632 if (u
->cgroup_path
&&
633 u
->cgroup_realized
&&
634 (u
->cgroup_realized_mask
& mask
) == mask
)
635 return u
->cgroup_path
;
637 u
= UNIT_DEREF(u
->slice
);
643 static int unit_create_cgroups(Unit
*u
, CGroupControllerMask mask
) {
644 _cleanup_free_
char *path
= NULL
;
649 path
= unit_default_cgroup_path(u
);
653 r
= hashmap_put(u
->manager
->cgroup_unit
, path
, u
);
655 log_error(r
== -EEXIST
? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path
, strerror(-r
));
659 u
->cgroup_path
= path
;
663 /* First, create our own group */
664 r
= cg_create_everywhere(u
->manager
->cgroup_supported
, mask
, u
->cgroup_path
);
666 log_error("Failed to create cgroup %s: %s", u
->cgroup_path
, strerror(-r
));
670 /* Keep track that this is now realized */
671 u
->cgroup_realized
= true;
672 u
->cgroup_realized_mask
= mask
;
674 /* Then, possibly move things over */
675 r
= cg_migrate_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, u
->cgroup_path
, migrate_callback
, u
);
677 log_warning("Failed to migrate cgroup from to %s: %s", u
->cgroup_path
, strerror(-r
));
682 static bool unit_has_mask_realized(Unit
*u
, CGroupControllerMask mask
) {
685 return u
->cgroup_realized
&& u
->cgroup_realized_mask
== mask
;
688 /* Check if necessary controllers and attributes for a unit are in place.
691 * If not, create paths, move processes over, and set attributes.
693 * Returns 0 on success and < 0 on failure. */
694 static int unit_realize_cgroup_now(Unit
*u
) {
695 CGroupControllerMask mask
;
700 if (u
->in_cgroup_queue
) {
701 LIST_REMOVE(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
702 u
->in_cgroup_queue
= false;
705 mask
= unit_get_target_mask(u
);
707 if (unit_has_mask_realized(u
, mask
))
710 /* First, realize parents */
711 if (UNIT_ISSET(u
->slice
)) {
712 r
= unit_realize_cgroup_now(UNIT_DEREF(u
->slice
));
717 /* And then do the real work */
718 r
= unit_create_cgroups(u
, mask
);
722 /* Finally, apply the necessary attributes. */
723 cgroup_context_apply(u
->manager
, unit_get_cgroup_context(u
), mask
, u
->cgroup_path
);
728 static void unit_add_to_cgroup_queue(Unit
*u
) {
730 if (u
->in_cgroup_queue
)
733 LIST_PREPEND(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
734 u
->in_cgroup_queue
= true;
737 unsigned manager_dispatch_cgroup_queue(Manager
*m
) {
742 while ((i
= m
->cgroup_queue
)) {
743 assert(i
->in_cgroup_queue
);
745 r
= unit_realize_cgroup_now(i
);
747 log_warning("Failed to realize cgroups for queued unit %s: %s", i
->id
, strerror(-r
));
755 static void unit_queue_siblings(Unit
*u
) {
758 /* This adds the siblings of the specified unit and the
759 * siblings of all parent units to the cgroup queue. (But
760 * neither the specified unit itself nor the parents.) */
762 while ((slice
= UNIT_DEREF(u
->slice
))) {
766 SET_FOREACH(m
, slice
->dependencies
[UNIT_BEFORE
], i
) {
770 /* Skip units that have a dependency on the slice
771 * but aren't actually in it. */
772 if (UNIT_DEREF(m
->slice
) != slice
)
775 /* No point in doing cgroup application for units
776 * without active processes. */
777 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m
)))
780 /* If the unit doesn't need any new controllers
781 * and has current ones realized, it doesn't need
783 if (unit_has_mask_realized(m
, unit_get_target_mask(m
)))
786 unit_add_to_cgroup_queue(m
);
793 int unit_realize_cgroup(Unit
*u
) {
798 c
= unit_get_cgroup_context(u
);
802 /* So, here's the deal: when realizing the cgroups for this
803 * unit, we need to first create all parents, but there's more
804 * actually: for the weight-based controllers we also need to
805 * make sure that all our siblings (i.e. units that are in the
806 * same slice as we are) have cgroups, too. Otherwise, things
807 * would become very uneven as each of their processes would
808 * get as much resources as all our group together. This call
809 * will synchronously create the parent cgroups, but will
810 * defer work on the siblings to the next event loop
813 /* Add all sibling slices to the cgroup queue. */
814 unit_queue_siblings(u
);
816 /* And realize this one now (and apply the values) */
817 return unit_realize_cgroup_now(u
);
820 void unit_destroy_cgroup(Unit
*u
) {
828 r
= cg_trim_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, !unit_has_name(u
, SPECIAL_ROOT_SLICE
));
830 log_debug("Failed to destroy cgroup %s: %s", u
->cgroup_path
, strerror(-r
));
832 hashmap_remove(u
->manager
->cgroup_unit
, u
->cgroup_path
);
834 free(u
->cgroup_path
);
835 u
->cgroup_path
= NULL
;
836 u
->cgroup_realized
= false;
837 u
->cgroup_realized_mask
= 0;
841 pid_t
unit_search_main_pid(Unit
*u
) {
842 _cleanup_fclose_
FILE *f
= NULL
;
843 pid_t pid
= 0, npid
, mypid
;
850 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, &f
) < 0)
854 while (cg_read_pid(f
, &npid
) > 0) {
860 /* Ignore processes that aren't our kids */
861 if (get_parent_of_pid(npid
, &ppid
) >= 0 && ppid
!= mypid
)
865 /* Dang, there's more than one daemonized PID
866 in this group, so we don't know what process
867 is the main process. */
878 int manager_setup_cgroup(Manager
*m
) {
879 _cleanup_free_
char *path
= NULL
;
885 /* 1. Determine hierarchy */
886 free(m
->cgroup_root
);
887 m
->cgroup_root
= NULL
;
889 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &m
->cgroup_root
);
891 log_error("Cannot determine cgroup we are running in: %s", strerror(-r
));
895 /* LEGACY: Already in /system.slice? If so, let's cut this
896 * off. This is to support live upgrades from older systemd
897 * versions where PID 1 was moved there. */
898 if (m
->running_as
== SYSTEMD_SYSTEM
) {
899 e
= endswith(m
->cgroup_root
, "/" SPECIAL_SYSTEM_SLICE
);
901 e
= endswith(m
->cgroup_root
, "/system");
906 /* And make sure to store away the root value without trailing
907 * slash, even for the root dir, so that we can easily prepend
909 if (streq(m
->cgroup_root
, "/"))
910 m
->cgroup_root
[0] = 0;
913 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, NULL
, &path
);
915 log_error("Cannot find cgroup mount point: %s", strerror(-r
));
919 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER
". File system hierarchy is at %s.", path
);
921 /* 3. Install agent */
922 if (m
->running_as
== SYSTEMD_SYSTEM
) {
923 r
= cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER
, SYSTEMD_CGROUP_AGENT_PATH
);
925 log_warning("Failed to install release agent, ignoring: %s", strerror(-r
));
927 log_debug("Installed release agent.");
929 log_debug("Release agent already installed.");
932 /* 4. Make sure we are in the root cgroup */
933 r
= cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, 0);
935 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r
));
939 /* 5. And pin it, so that it cannot be unmounted */
940 safe_close(m
->pin_cgroupfs_fd
);
942 m
->pin_cgroupfs_fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_DIRECTORY
|O_NOCTTY
|O_NONBLOCK
);
943 if (m
->pin_cgroupfs_fd
< 0) {
944 log_error("Failed to open pin file: %m");
948 /* 6. Figure out which controllers are supported */
949 m
->cgroup_supported
= cg_mask_supported();
951 /* 7. Always enable hierarchial support if it exists... */
952 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
957 void manager_shutdown_cgroup(Manager
*m
, bool delete) {
960 /* We can't really delete the group, since we are in it. But
962 if (delete && m
->cgroup_root
)
963 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, false);
965 m
->pin_cgroupfs_fd
= safe_close(m
->pin_cgroupfs_fd
);
967 free(m
->cgroup_root
);
968 m
->cgroup_root
= NULL
;
971 Unit
* manager_get_unit_by_cgroup(Manager
*m
, const char *cgroup
) {
978 u
= hashmap_get(m
->cgroup_unit
, cgroup
);
992 u
= hashmap_get(m
->cgroup_unit
, p
);
998 Unit
*manager_get_unit_by_pid(Manager
*m
, pid_t pid
) {
999 _cleanup_free_
char *cgroup
= NULL
;
1007 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &cgroup
);
1011 return manager_get_unit_by_cgroup(m
, cgroup
);
1014 int manager_notify_cgroup_empty(Manager
*m
, const char *cgroup
) {
1021 u
= manager_get_unit_by_cgroup(m
, cgroup
);
1023 r
= cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, true);
1025 if (UNIT_VTABLE(u
)->notify_cgroup_empty
)
1026 UNIT_VTABLE(u
)->notify_cgroup_empty(u
);
1028 unit_add_to_gc_queue(u
);
1035 static const char* const cgroup_device_policy_table
[_CGROUP_DEVICE_POLICY_MAX
] = {
1036 [CGROUP_AUTO
] = "auto",
1037 [CGROUP_CLOSED
] = "closed",
1038 [CGROUP_STRICT
] = "strict",
1041 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy
, CGroupDevicePolicy
);