]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/cgroup.c
util: replace close_nointr_nofail() by a more useful safe_close()
[thirdparty/systemd.git] / src / core / cgroup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <fcntl.h>
23 #include <fnmatch.h>
24
25 #include "path-util.h"
26 #include "special.h"
27 #include "cgroup-util.h"
28 #include "cgroup.h"
29
30 void cgroup_context_init(CGroupContext *c) {
31 assert(c);
32
33 /* Initialize everything to the kernel defaults, assuming the
34 * structure is preinitialized to 0 */
35
36 c->cpu_shares = 1024;
37 c->memory_limit = (uint64_t) -1;
38 c->blockio_weight = 1000;
39 }
40
41 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
42 assert(c);
43 assert(a);
44
45 LIST_REMOVE(device_allow, c->device_allow, a);
46 free(a->path);
47 free(a);
48 }
49
50 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
51 assert(c);
52 assert(w);
53
54 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
55 free(w->path);
56 free(w);
57 }
58
59 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
60 assert(c);
61 assert(b);
62
63 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
64 free(b->path);
65 free(b);
66 }
67
68 void cgroup_context_done(CGroupContext *c) {
69 assert(c);
70
71 while (c->blockio_device_weights)
72 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
73
74 while (c->blockio_device_bandwidths)
75 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
76
77 while (c->device_allow)
78 cgroup_context_free_device_allow(c, c->device_allow);
79 }
80
81 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
82 CGroupBlockIODeviceBandwidth *b;
83 CGroupBlockIODeviceWeight *w;
84 CGroupDeviceAllow *a;
85
86 assert(c);
87 assert(f);
88
89 prefix = strempty(prefix);
90
91 fprintf(f,
92 "%sCPUAccounting=%s\n"
93 "%sBlockIOAccounting=%s\n"
94 "%sMemoryAccounting=%s\n"
95 "%sCPUShares=%lu\n"
96 "%sBlockIOWeight=%lu\n"
97 "%sMemoryLimit=%" PRIu64 "\n"
98 "%sDevicePolicy=%s\n",
99 prefix, yes_no(c->cpu_accounting),
100 prefix, yes_no(c->blockio_accounting),
101 prefix, yes_no(c->memory_accounting),
102 prefix, c->cpu_shares,
103 prefix, c->blockio_weight,
104 prefix, c->memory_limit,
105 prefix, cgroup_device_policy_to_string(c->device_policy));
106
107 LIST_FOREACH(device_allow, a, c->device_allow)
108 fprintf(f,
109 "%sDeviceAllow=%s %s%s%s\n",
110 prefix,
111 a->path,
112 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
113
114 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
115 fprintf(f,
116 "%sBlockIODeviceWeight=%s %lu",
117 prefix,
118 w->path,
119 w->weight);
120
121 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
122 char buf[FORMAT_BYTES_MAX];
123
124 fprintf(f,
125 "%s%s=%s %s\n",
126 prefix,
127 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
128 b->path,
129 format_bytes(buf, sizeof(buf), b->bandwidth));
130 }
131 }
132
133 static int lookup_blkio_device(const char *p, dev_t *dev) {
134 struct stat st;
135 int r;
136
137 assert(p);
138 assert(dev);
139
140 r = stat(p, &st);
141 if (r < 0) {
142 log_warning("Couldn't stat device %s: %m", p);
143 return -errno;
144 }
145
146 if (S_ISBLK(st.st_mode))
147 *dev = st.st_rdev;
148 else if (major(st.st_dev) != 0) {
149 /* If this is not a device node then find the block
150 * device this file is stored on */
151 *dev = st.st_dev;
152
153 /* If this is a partition, try to get the originating
154 * block device */
155 block_get_whole_disk(*dev, dev);
156 } else {
157 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
158 return -ENODEV;
159 }
160
161 return 0;
162 }
163
164 static int whitelist_device(const char *path, const char *node, const char *acc) {
165 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
166 struct stat st;
167 int r;
168
169 assert(path);
170 assert(acc);
171
172 if (stat(node, &st) < 0) {
173 log_warning("Couldn't stat device %s", node);
174 return -errno;
175 }
176
177 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
178 log_warning("%s is not a device.", node);
179 return -ENODEV;
180 }
181
182 sprintf(buf,
183 "%c %u:%u %s",
184 S_ISCHR(st.st_mode) ? 'c' : 'b',
185 major(st.st_rdev), minor(st.st_rdev),
186 acc);
187
188 r = cg_set_attribute("devices", path, "devices.allow", buf);
189 if (r < 0)
190 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
191
192 return r;
193 }
194
195 static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
196 _cleanup_fclose_ FILE *f = NULL;
197 char line[LINE_MAX];
198 bool good = false;
199 int r;
200
201 assert(path);
202 assert(acc);
203 assert(type == 'b' || type == 'c');
204
205 f = fopen("/proc/devices", "re");
206 if (!f) {
207 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
208 return -errno;
209 }
210
211 FOREACH_LINE(line, f, goto fail) {
212 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
213 unsigned maj;
214
215 truncate_nl(line);
216
217 if (type == 'c' && streq(line, "Character devices:")) {
218 good = true;
219 continue;
220 }
221
222 if (type == 'b' && streq(line, "Block devices:")) {
223 good = true;
224 continue;
225 }
226
227 if (isempty(line)) {
228 good = false;
229 continue;
230 }
231
232 if (!good)
233 continue;
234
235 p = strstrip(line);
236
237 w = strpbrk(p, WHITESPACE);
238 if (!w)
239 continue;
240 *w = 0;
241
242 r = safe_atou(p, &maj);
243 if (r < 0)
244 continue;
245 if (maj <= 0)
246 continue;
247
248 w++;
249 w += strspn(w, WHITESPACE);
250
251 if (fnmatch(name, w, 0) != 0)
252 continue;
253
254 sprintf(buf,
255 "%c %u:* %s",
256 type,
257 maj,
258 acc);
259
260 r = cg_set_attribute("devices", path, "devices.allow", buf);
261 if (r < 0)
262 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
263 }
264
265 return 0;
266
267 fail:
268 log_warning("Failed to read /proc/devices: %m");
269 return -errno;
270 }
271
272 void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
273 bool is_root;
274 int r;
275
276 assert(c);
277 assert(path);
278
279 if (mask == 0)
280 return;
281
282 /* Some cgroup attributes are not support on the root cgroup,
283 * hence silently ignore */
284 is_root = isempty(path) || path_equal(path, "/");
285
286 if ((mask & CGROUP_CPU) && !is_root) {
287 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
288
289 sprintf(buf, "%lu\n", c->cpu_shares);
290 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
291 if (r < 0)
292 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
293 }
294
295 if (mask & CGROUP_BLKIO) {
296 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
297 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
298 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
299 CGroupBlockIODeviceWeight *w;
300 CGroupBlockIODeviceBandwidth *b;
301
302 if (!is_root) {
303 sprintf(buf, "%lu\n", c->blockio_weight);
304 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
305 if (r < 0)
306 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
307
308 /* FIXME: no way to reset this list */
309 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
310 dev_t dev;
311
312 r = lookup_blkio_device(w->path, &dev);
313 if (r < 0)
314 continue;
315
316 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
317 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
318 if (r < 0)
319 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
320 }
321 }
322
323 /* FIXME: no way to reset this list */
324 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
325 const char *a;
326 dev_t dev;
327
328 r = lookup_blkio_device(b->path, &dev);
329 if (r < 0)
330 continue;
331
332 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
333
334 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
335 r = cg_set_attribute("blkio", path, a, buf);
336 if (r < 0)
337 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
338 }
339 }
340
341 if (mask & CGROUP_MEMORY) {
342 if (c->memory_limit != (uint64_t) -1) {
343 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
344
345 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
346 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
347 } else
348 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
349
350 if (r < 0)
351 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
352 }
353
354 if ((mask & CGROUP_DEVICE) && !is_root) {
355 CGroupDeviceAllow *a;
356
357 if (c->device_allow || c->device_policy != CGROUP_AUTO)
358 r = cg_set_attribute("devices", path, "devices.deny", "a");
359 else
360 r = cg_set_attribute("devices", path, "devices.allow", "a");
361 if (r < 0)
362 log_warning("Failed to reset devices.list on %s: %s", path, strerror(-r));
363
364 if (c->device_policy == CGROUP_CLOSED ||
365 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
366 static const char auto_devices[] =
367 "/dev/null\0" "rw\0"
368 "/dev/zero\0" "rw\0"
369 "/dev/full\0" "rw\0"
370 "/dev/random\0" "rw\0"
371 "/dev/urandom\0" "rw\0";
372
373 const char *x, *y;
374
375 NULSTR_FOREACH_PAIR(x, y, auto_devices)
376 whitelist_device(path, x, y);
377 }
378
379 LIST_FOREACH(device_allow, a, c->device_allow) {
380 char acc[4];
381 unsigned k = 0;
382
383 if (a->r)
384 acc[k++] = 'r';
385 if (a->w)
386 acc[k++] = 'w';
387 if (a->m)
388 acc[k++] = 'm';
389
390 if (k == 0)
391 continue;
392
393 acc[k++] = 0;
394
395 if (startswith(a->path, "/dev/"))
396 whitelist_device(path, a->path, acc);
397 else if (startswith(a->path, "block-"))
398 whitelist_major(path, a->path + 6, 'b', acc);
399 else if (startswith(a->path, "char-"))
400 whitelist_major(path, a->path + 5, 'c', acc);
401 else
402 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
403 }
404 }
405 }
406
407 CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
408 CGroupControllerMask mask = 0;
409
410 /* Figure out which controllers we need */
411
412 if (c->cpu_accounting || c->cpu_shares != 1024)
413 mask |= CGROUP_CPUACCT | CGROUP_CPU;
414
415 if (c->blockio_accounting ||
416 c->blockio_weight != 1000 ||
417 c->blockio_device_weights ||
418 c->blockio_device_bandwidths)
419 mask |= CGROUP_BLKIO;
420
421 if (c->memory_accounting ||
422 c->memory_limit != (uint64_t) -1)
423 mask |= CGROUP_MEMORY;
424
425 if (c->device_allow || c->device_policy != CGROUP_AUTO)
426 mask |= CGROUP_DEVICE;
427
428 return mask;
429 }
430
431 CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
432 CGroupContext *c;
433
434 c = unit_get_cgroup_context(u);
435 if (!c)
436 return 0;
437
438 return cgroup_context_get_mask(c);
439 }
440
441 CGroupControllerMask unit_get_members_mask(Unit *u) {
442 assert(u);
443
444 if (u->cgroup_members_mask_valid)
445 return u->cgroup_members_mask;
446
447 u->cgroup_members_mask = 0;
448
449 if (u->type == UNIT_SLICE) {
450 Unit *member;
451 Iterator i;
452
453 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
454
455 if (member == u)
456 continue;
457
458 if (UNIT_DEREF(member->slice) != u)
459 continue;
460
461 u->cgroup_members_mask |=
462 unit_get_cgroup_mask(member) |
463 unit_get_members_mask(member);
464 }
465 }
466
467 u->cgroup_members_mask_valid = true;
468 return u->cgroup_members_mask;
469 }
470
471 CGroupControllerMask unit_get_siblings_mask(Unit *u) {
472 CGroupControllerMask m;
473
474 assert(u);
475
476 if (UNIT_ISSET(u->slice))
477 m = unit_get_members_mask(UNIT_DEREF(u->slice));
478 else
479 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
480
481 /* Sibling propagation is only relevant for weight-based
482 * controllers, so let's mask out everything else */
483 return m & (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
484 }
485
486 CGroupControllerMask unit_get_target_mask(Unit *u) {
487 CGroupControllerMask mask;
488
489 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
490 mask &= u->manager->cgroup_supported;
491
492 return mask;
493 }
494
495 /* Recurse from a unit up through its containing slices, propagating
496 * mask bits upward. A unit is also member of itself. */
497 void unit_update_cgroup_members_masks(Unit *u) {
498 CGroupControllerMask m;
499 bool more;
500
501 assert(u);
502
503 /* Calculate subtree mask */
504 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
505
506 /* See if anything changed from the previous invocation. If
507 * not, we're done. */
508 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
509 return;
510
511 more =
512 u->cgroup_subtree_mask_valid &&
513 ((m & ~u->cgroup_subtree_mask) != 0) &&
514 ((~m & u->cgroup_subtree_mask) == 0);
515
516 u->cgroup_subtree_mask = m;
517 u->cgroup_subtree_mask_valid = true;
518
519 if (UNIT_ISSET(u->slice)) {
520 Unit *s = UNIT_DEREF(u->slice);
521
522 if (more)
523 /* There's more set now than before. We
524 * propagate the new mask to the parent's mask
525 * (not caring if it actually was valid or
526 * not). */
527
528 s->cgroup_members_mask |= m;
529
530 else
531 /* There's less set now than before (or we
532 * don't know), we need to recalculate
533 * everything, so let's invalidate the
534 * parent's members mask */
535
536 s->cgroup_members_mask_valid = false;
537
538 /* And now make sure that this change also hits our
539 * grandparents */
540 unit_update_cgroup_members_masks(s);
541 }
542 }
543
544 static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
545 Unit *u = userdata;
546
547 assert(mask != 0);
548 assert(u);
549
550 while (u) {
551 if (u->cgroup_path &&
552 u->cgroup_realized &&
553 (u->cgroup_realized_mask & mask) == mask)
554 return u->cgroup_path;
555
556 u = UNIT_DEREF(u->slice);
557 }
558
559 return NULL;
560 }
561
562 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
563 _cleanup_free_ char *path = NULL;
564 int r;
565
566 assert(u);
567
568 path = unit_default_cgroup_path(u);
569 if (!path)
570 return log_oom();
571
572 r = hashmap_put(u->manager->cgroup_unit, path, u);
573 if (r < 0) {
574 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
575 return r;
576 }
577 if (r > 0) {
578 u->cgroup_path = path;
579 path = NULL;
580 }
581
582 /* First, create our own group */
583 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
584 if (r < 0) {
585 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
586 return r;
587 }
588
589 /* Keep track that this is now realized */
590 u->cgroup_realized = true;
591 u->cgroup_realized_mask = mask;
592
593 /* Then, possibly move things over */
594 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
595 if (r < 0)
596 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
597
598 return 0;
599 }
600
601 static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
602 assert(u);
603
604 return u->cgroup_realized && u->cgroup_realized_mask == mask;
605 }
606
607 /* Check if necessary controllers and attributes for a unit are in place.
608 *
609 * If so, do nothing.
610 * If not, create paths, move processes over, and set attributes.
611 *
612 * Returns 0 on success and < 0 on failure. */
613 static int unit_realize_cgroup_now(Unit *u) {
614 CGroupControllerMask mask;
615 int r;
616
617 assert(u);
618
619 if (u->in_cgroup_queue) {
620 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
621 u->in_cgroup_queue = false;
622 }
623
624 mask = unit_get_target_mask(u);
625
626 if (unit_has_mask_realized(u, mask))
627 return 0;
628
629 /* First, realize parents */
630 if (UNIT_ISSET(u->slice)) {
631 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice));
632 if (r < 0)
633 return r;
634 }
635
636 /* And then do the real work */
637 r = unit_create_cgroups(u, mask);
638 if (r < 0)
639 return r;
640
641 /* Finally, apply the necessary attributes. */
642 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path);
643
644 return 0;
645 }
646
647 static void unit_add_to_cgroup_queue(Unit *u) {
648
649 if (u->in_cgroup_queue)
650 return;
651
652 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
653 u->in_cgroup_queue = true;
654 }
655
656 unsigned manager_dispatch_cgroup_queue(Manager *m) {
657 Unit *i;
658 unsigned n = 0;
659 int r;
660
661 while ((i = m->cgroup_queue)) {
662 assert(i->in_cgroup_queue);
663
664 r = unit_realize_cgroup_now(i);
665 if (r < 0)
666 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
667
668 n++;
669 }
670
671 return n;
672 }
673
674 static void unit_queue_siblings(Unit *u) {
675 Unit *slice;
676
677 /* This adds the siblings of the specified unit and the
678 * siblings of all parent units to the cgroup queue. (But
679 * neither the specified unit itself nor the parents.) */
680
681 while ((slice = UNIT_DEREF(u->slice))) {
682 Iterator i;
683 Unit *m;
684
685 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
686 if (m == u)
687 continue;
688
689 /* Skip units that have a dependency on the slice
690 * but aren't actually in it. */
691 if (UNIT_DEREF(m->slice) != slice)
692 continue;
693
694 /* No point in doing cgroup application for units
695 * without active processes. */
696 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
697 continue;
698
699 /* If the unit doesn't need any new controllers
700 * and has current ones realized, it doesn't need
701 * any changes. */
702 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
703 continue;
704
705 unit_add_to_cgroup_queue(m);
706 }
707
708 u = slice;
709 }
710 }
711
712 int unit_realize_cgroup(Unit *u) {
713 CGroupContext *c;
714
715 assert(u);
716
717 c = unit_get_cgroup_context(u);
718 if (!c)
719 return 0;
720
721 /* So, here's the deal: when realizing the cgroups for this
722 * unit, we need to first create all parents, but there's more
723 * actually: for the weight-based controllers we also need to
724 * make sure that all our siblings (i.e. units that are in the
725 * same slice as we are) have cgroups, too. Otherwise, things
726 * would become very uneven as each of their processes would
727 * get as much resources as all our group together. This call
728 * will synchronously create the parent cgroups, but will
729 * defer work on the siblings to the next event loop
730 * iteration. */
731
732 /* Add all sibling slices to the cgroup queue. */
733 unit_queue_siblings(u);
734
735 /* And realize this one now (and apply the values) */
736 return unit_realize_cgroup_now(u);
737 }
738
739 void unit_destroy_cgroup(Unit *u) {
740 int r;
741
742 assert(u);
743
744 if (!u->cgroup_path)
745 return;
746
747 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
748 if (r < 0)
749 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
750
751 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
752
753 free(u->cgroup_path);
754 u->cgroup_path = NULL;
755 u->cgroup_realized = false;
756 u->cgroup_realized_mask = 0;
757
758 }
759
760 pid_t unit_search_main_pid(Unit *u) {
761 _cleanup_fclose_ FILE *f = NULL;
762 pid_t pid = 0, npid, mypid;
763
764 assert(u);
765
766 if (!u->cgroup_path)
767 return 0;
768
769 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
770 return 0;
771
772 mypid = getpid();
773 while (cg_read_pid(f, &npid) > 0) {
774 pid_t ppid;
775
776 if (npid == pid)
777 continue;
778
779 /* Ignore processes that aren't our kids */
780 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
781 continue;
782
783 if (pid != 0) {
784 /* Dang, there's more than one daemonized PID
785 in this group, so we don't know what process
786 is the main process. */
787 pid = 0;
788 break;
789 }
790
791 pid = npid;
792 }
793
794 return pid;
795 }
796
797 int manager_setup_cgroup(Manager *m) {
798 _cleanup_free_ char *path = NULL;
799 char *e;
800 int r;
801
802 assert(m);
803
804 /* 0. Be nice to Ingo Molnar #628004 */
805 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
806 log_warning("No control group support available, not creating root group.");
807 return 0;
808 }
809
810 /* 1. Determine hierarchy */
811 free(m->cgroup_root);
812 m->cgroup_root = NULL;
813
814 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
815 if (r < 0) {
816 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
817 return r;
818 }
819
820 /* LEGACY: Already in /system.slice? If so, let's cut this
821 * off. This is to support live upgrades from older systemd
822 * versions where PID 1 was moved there. */
823 if (m->running_as == SYSTEMD_SYSTEM) {
824 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
825 if (!e)
826 e = endswith(m->cgroup_root, "/system");
827 if (e)
828 *e = 0;
829 }
830
831 /* And make sure to store away the root value without trailing
832 * slash, even for the root dir, so that we can easily prepend
833 * it everywhere. */
834 if (streq(m->cgroup_root, "/"))
835 m->cgroup_root[0] = 0;
836
837 /* 2. Show data */
838 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
839 if (r < 0) {
840 log_error("Cannot find cgroup mount point: %s", strerror(-r));
841 return r;
842 }
843
844 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
845
846 /* 3. Install agent */
847 if (m->running_as == SYSTEMD_SYSTEM) {
848 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
849 if (r < 0)
850 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
851 else if (r > 0)
852 log_debug("Installed release agent.");
853 else
854 log_debug("Release agent already installed.");
855 }
856
857 /* 4. Make sure we are in the root cgroup */
858 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
859 if (r < 0) {
860 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
861 return r;
862 }
863
864 /* 5. And pin it, so that it cannot be unmounted */
865 safe_close(m->pin_cgroupfs_fd);
866
867 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
868 if (r < 0) {
869 log_error("Failed to open pin file: %m");
870 return -errno;
871 }
872
873 /* 6. Figure out which controllers are supported */
874 m->cgroup_supported = cg_mask_supported();
875
876 /* 7. Always enable hierarchial support if it exists... */
877 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
878
879 return 0;
880 }
881
882 void manager_shutdown_cgroup(Manager *m, bool delete) {
883 assert(m);
884
885 /* We can't really delete the group, since we are in it. But
886 * let's trim it. */
887 if (delete && m->cgroup_root)
888 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
889
890 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
891
892 free(m->cgroup_root);
893 m->cgroup_root = NULL;
894 }
895
896 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
897 char *p;
898 Unit *u;
899
900 assert(m);
901 assert(cgroup);
902
903 u = hashmap_get(m->cgroup_unit, cgroup);
904 if (u)
905 return u;
906
907 p = strdupa(cgroup);
908 for (;;) {
909 char *e;
910
911 e = strrchr(p, '/');
912 if (e == p || !e)
913 return NULL;
914
915 *e = 0;
916
917 u = hashmap_get(m->cgroup_unit, p);
918 if (u)
919 return u;
920 }
921 }
922
923 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
924 _cleanup_free_ char *cgroup = NULL;
925 int r;
926
927 assert(m);
928
929 if (pid <= 1)
930 return NULL;
931
932 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
933 if (r < 0)
934 return NULL;
935
936 return manager_get_unit_by_cgroup(m, cgroup);
937 }
938
939 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
940 Unit *u;
941 int r;
942
943 assert(m);
944 assert(cgroup);
945
946 u = manager_get_unit_by_cgroup(m, cgroup);
947 if (u) {
948 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
949 if (r > 0) {
950 if (UNIT_VTABLE(u)->notify_cgroup_empty)
951 UNIT_VTABLE(u)->notify_cgroup_empty(u);
952
953 unit_add_to_gc_queue(u);
954 }
955 }
956
957 return 0;
958 }
959
960 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
961 [CGROUP_AUTO] = "auto",
962 [CGROUP_CLOSED] = "closed",
963 [CGROUP_STRICT] = "strict",
964 };
965
966 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);