]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
udate TODO
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
4ad49000
LP
30void cgroup_context_init(CGroupContext *c) {
31 assert(c);
32
33 /* Initialize everything to the kernel defaults, assuming the
34 * structure is preinitialized to 0 */
35
36 c->cpu_shares = 1024;
ddca82ac 37 c->memory_limit = (uint64_t) -1;
4ad49000
LP
38 c->blockio_weight = 1000;
39}
8e274523 40
4ad49000
LP
41void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
42 assert(c);
43 assert(a);
44
71fda00f 45 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
46 free(a->path);
47 free(a);
48}
49
50void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
51 assert(c);
52 assert(w);
53
71fda00f 54 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
55 free(w->path);
56 free(w);
57}
58
59void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
60 assert(c);
8e274523 61 assert(b);
8e274523 62
71fda00f 63 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
64 free(b->path);
65 free(b);
66}
67
68void cgroup_context_done(CGroupContext *c) {
69 assert(c);
70
71 while (c->blockio_device_weights)
72 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
73
74 while (c->blockio_device_bandwidths)
75 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
76
77 while (c->device_allow)
78 cgroup_context_free_device_allow(c, c->device_allow);
79}
80
81void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
82 CGroupBlockIODeviceBandwidth *b;
83 CGroupBlockIODeviceWeight *w;
84 CGroupDeviceAllow *a;
85
86 assert(c);
87 assert(f);
88
89 prefix = strempty(prefix);
90
91 fprintf(f,
92 "%sCPUAccounting=%s\n"
93 "%sBlockIOAccounting=%s\n"
94 "%sMemoryAccounting=%s\n"
95 "%sCPUShares=%lu\n"
112a7f46 96 "%sBlockIOWeight=%lu\n"
4ad49000 97 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
98 "%sDevicePolicy=%s\n",
99 prefix, yes_no(c->cpu_accounting),
100 prefix, yes_no(c->blockio_accounting),
101 prefix, yes_no(c->memory_accounting),
102 prefix, c->cpu_shares,
103 prefix, c->blockio_weight,
104 prefix, c->memory_limit,
4ad49000
LP
105 prefix, cgroup_device_policy_to_string(c->device_policy));
106
107 LIST_FOREACH(device_allow, a, c->device_allow)
108 fprintf(f,
109 "%sDeviceAllow=%s %s%s%s\n",
110 prefix,
111 a->path,
112 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
113
114 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
115 fprintf(f,
8e7076ca 116 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
117 prefix,
118 w->path,
119 w->weight);
120
121 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
122 char buf[FORMAT_BYTES_MAX];
123
124 fprintf(f,
125 "%s%s=%s %s\n",
126 prefix,
127 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
128 b->path,
129 format_bytes(buf, sizeof(buf), b->bandwidth));
130 }
131}
132
133static int lookup_blkio_device(const char *p, dev_t *dev) {
134 struct stat st;
135 int r;
136
137 assert(p);
138 assert(dev);
139
140 r = stat(p, &st);
ab1f0633 141 if (r < 0) {
4ad49000
LP
142 log_warning("Couldn't stat device %s: %m", p);
143 return -errno;
ab1f0633 144 }
8e274523 145
4ad49000
LP
146 if (S_ISBLK(st.st_mode))
147 *dev = st.st_rdev;
148 else if (major(st.st_dev) != 0) {
149 /* If this is not a device node then find the block
150 * device this file is stored on */
151 *dev = st.st_dev;
152
153 /* If this is a partition, try to get the originating
154 * block device */
155 block_get_whole_disk(*dev, dev);
156 } else {
157 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
158 return -ENODEV;
159 }
8e274523 160
8e274523 161 return 0;
8e274523
LP
162}
163
4ad49000
LP
164static int whitelist_device(const char *path, const char *node, const char *acc) {
165 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
166 struct stat st;
8c6db833 167 int r;
8e274523 168
4ad49000
LP
169 assert(path);
170 assert(acc);
8e274523 171
4ad49000
LP
172 if (stat(node, &st) < 0) {
173 log_warning("Couldn't stat device %s", node);
174 return -errno;
175 }
176
177 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
178 log_warning("%s is not a device.", node);
179 return -ENODEV;
180 }
181
182 sprintf(buf,
183 "%c %u:%u %s",
184 S_ISCHR(st.st_mode) ? 'c' : 'b',
185 major(st.st_rdev), minor(st.st_rdev),
186 acc);
187
188 r = cg_set_attribute("devices", path, "devices.allow", buf);
189 if (r < 0)
190 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
191
192 return r;
8e274523
LP
193}
194
90060676
LP
195static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
196 _cleanup_fclose_ FILE *f = NULL;
197 char line[LINE_MAX];
198 bool good = false;
199 int r;
200
201 assert(path);
202 assert(acc);
203 assert(type == 'b' || type == 'c');
204
205 f = fopen("/proc/devices", "re");
206 if (!f) {
207 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
208 return -errno;
209 }
210
211 FOREACH_LINE(line, f, goto fail) {
212 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
213 unsigned maj;
214
215 truncate_nl(line);
216
217 if (type == 'c' && streq(line, "Character devices:")) {
218 good = true;
219 continue;
220 }
221
222 if (type == 'b' && streq(line, "Block devices:")) {
223 good = true;
224 continue;
225 }
226
227 if (isempty(line)) {
228 good = false;
229 continue;
230 }
231
232 if (!good)
233 continue;
234
235 p = strstrip(line);
236
237 w = strpbrk(p, WHITESPACE);
238 if (!w)
239 continue;
240 *w = 0;
241
242 r = safe_atou(p, &maj);
243 if (r < 0)
244 continue;
245 if (maj <= 0)
246 continue;
247
248 w++;
249 w += strspn(w, WHITESPACE);
e41969e3
LP
250
251 if (fnmatch(name, w, 0) != 0)
90060676
LP
252 continue;
253
254 sprintf(buf,
255 "%c %u:* %s",
256 type,
257 maj,
258 acc);
259
260 r = cg_set_attribute("devices", path, "devices.allow", buf);
261 if (r < 0)
262 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
263 }
264
265 return 0;
266
267fail:
268 log_warning("Failed to read /proc/devices: %m");
269 return -errno;
270}
271
4ad49000 272void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
01efdf13 273 bool is_root;
4ad49000
LP
274 int r;
275
276 assert(c);
277 assert(path);
8e274523 278
4ad49000
LP
279 if (mask == 0)
280 return;
8e274523 281
01efdf13
LP
282 /* Some cgroup attributes are not support on the root cgroup,
283 * hence silently ignore */
284 is_root = isempty(path) || path_equal(path, "/");
285
286 if ((mask & CGROUP_CPU) && !is_root) {
4ad49000 287 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
8e274523 288
4ad49000
LP
289 sprintf(buf, "%lu\n", c->cpu_shares);
290 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
291 if (r < 0)
292 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
293 }
294
295 if (mask & CGROUP_BLKIO) {
296 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
297 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
298 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
299 CGroupBlockIODeviceWeight *w;
300 CGroupBlockIODeviceBandwidth *b;
301
01efdf13
LP
302 if (!is_root) {
303 sprintf(buf, "%lu\n", c->blockio_weight);
304 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
305 if (r < 0)
306 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 307
01efdf13
LP
308 /* FIXME: no way to reset this list */
309 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
310 dev_t dev;
4ad49000 311
01efdf13
LP
312 r = lookup_blkio_device(w->path, &dev);
313 if (r < 0)
314 continue;
8e274523 315
01efdf13
LP
316 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
317 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
318 if (r < 0)
319 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
320 }
4ad49000
LP
321 }
322
323 /* FIXME: no way to reset this list */
324 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
325 const char *a;
326 dev_t dev;
327
328 r = lookup_blkio_device(b->path, &dev);
329 if (r < 0)
330 continue;
331
332 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
333
334 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
335 r = cg_set_attribute("blkio", path, a, buf);
336 if (r < 0)
337 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 338 }
8e274523
LP
339 }
340
4ad49000 341 if (mask & CGROUP_MEMORY) {
6a94f2e9 342 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
343 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
344
6a94f2e9
G
345 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
346 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
347 } else
348 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 349
4ad49000
LP
350 if (r < 0)
351 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 352 }
8e274523 353
01efdf13 354 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 355 CGroupDeviceAllow *a;
8e274523 356
4ad49000
LP
357 if (c->device_allow || c->device_policy != CGROUP_AUTO)
358 r = cg_set_attribute("devices", path, "devices.deny", "a");
359 else
360 r = cg_set_attribute("devices", path, "devices.allow", "a");
361 if (r < 0)
01efdf13 362 log_warning("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 363
4ad49000
LP
364 if (c->device_policy == CGROUP_CLOSED ||
365 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
366 static const char auto_devices[] =
367 "/dev/null\0" "rw\0"
368 "/dev/zero\0" "rw\0"
369 "/dev/full\0" "rw\0"
370 "/dev/random\0" "rw\0"
371 "/dev/urandom\0" "rw\0";
372
373 const char *x, *y;
374
375 NULSTR_FOREACH_PAIR(x, y, auto_devices)
376 whitelist_device(path, x, y);
377 }
378
379 LIST_FOREACH(device_allow, a, c->device_allow) {
380 char acc[4];
381 unsigned k = 0;
382
383 if (a->r)
384 acc[k++] = 'r';
385 if (a->w)
386 acc[k++] = 'w';
387 if (a->m)
388 acc[k++] = 'm';
fb385181 389
4ad49000
LP
390 if (k == 0)
391 continue;
fb385181 392
4ad49000 393 acc[k++] = 0;
90060676
LP
394
395 if (startswith(a->path, "/dev/"))
396 whitelist_device(path, a->path, acc);
397 else if (startswith(a->path, "block-"))
398 whitelist_major(path, a->path + 6, 'b', acc);
399 else if (startswith(a->path, "char-"))
400 whitelist_major(path, a->path + 5, 'c', acc);
401 else
402 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
403 }
404 }
fb385181
LP
405}
406
4ad49000
LP
407CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
408 CGroupControllerMask mask = 0;
8e274523 409
4ad49000 410 /* Figure out which controllers we need */
8e274523 411
4ad49000
LP
412 if (c->cpu_accounting || c->cpu_shares != 1024)
413 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 414
4ad49000
LP
415 if (c->blockio_accounting ||
416 c->blockio_weight != 1000 ||
417 c->blockio_device_weights ||
418 c->blockio_device_bandwidths)
419 mask |= CGROUP_BLKIO;
ecedd90f 420
4ad49000 421 if (c->memory_accounting ||
ddca82ac 422 c->memory_limit != (uint64_t) -1)
4ad49000 423 mask |= CGROUP_MEMORY;
8e274523 424
4ad49000
LP
425 if (c->device_allow || c->device_policy != CGROUP_AUTO)
426 mask |= CGROUP_DEVICE;
427
428 return mask;
8e274523
LP
429}
430
bc432dc7 431CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 432 CGroupContext *c;
8e274523 433
4ad49000
LP
434 c = unit_get_cgroup_context(u);
435 if (!c)
436 return 0;
8e274523 437
4ad49000 438 return cgroup_context_get_mask(c);
8e274523
LP
439}
440
bc432dc7 441CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 442 assert(u);
bc432dc7
LP
443
444 if (u->cgroup_members_mask_valid)
445 return u->cgroup_members_mask;
446
447 u->cgroup_members_mask = 0;
448
449 if (u->type == UNIT_SLICE) {
450 Unit *member;
451 Iterator i;
452
453 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
454
455 if (member == u)
456 continue;
457
d4fdc205 458 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
459 continue;
460
461 u->cgroup_members_mask |=
462 unit_get_cgroup_mask(member) |
463 unit_get_members_mask(member);
464 }
465 }
466
467 u->cgroup_members_mask_valid = true;
6414b7c9 468 return u->cgroup_members_mask;
246aa6dd
LP
469}
470
bc432dc7
LP
471CGroupControllerMask unit_get_siblings_mask(Unit *u) {
472 CGroupControllerMask m;
473
4ad49000 474 assert(u);
246aa6dd 475
bc432dc7
LP
476 if (UNIT_ISSET(u->slice))
477 m = unit_get_members_mask(UNIT_DEREF(u->slice));
478 else
479 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
4ad49000
LP
480
481 /* Sibling propagation is only relevant for weight-based
482 * controllers, so let's mask out everything else */
bc432dc7 483 return m & (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
246aa6dd
LP
484}
485
bc432dc7 486CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
487 CGroupControllerMask mask;
488
489 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
490 mask &= u->manager->cgroup_supported;
491
492 return mask;
493}
494
495/* Recurse from a unit up through its containing slices, propagating
496 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
497void unit_update_cgroup_members_masks(Unit *u) {
498 CGroupControllerMask m;
499 bool more;
500
501 assert(u);
502
503 /* Calculate subtree mask */
504 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
505
506 /* See if anything changed from the previous invocation. If
507 * not, we're done. */
508 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
509 return;
510
511 more =
512 u->cgroup_subtree_mask_valid &&
513 ((m & ~u->cgroup_subtree_mask) != 0) &&
514 ((~m & u->cgroup_subtree_mask) == 0);
515
516 u->cgroup_subtree_mask = m;
517 u->cgroup_subtree_mask_valid = true;
518
6414b7c9
DS
519 if (UNIT_ISSET(u->slice)) {
520 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
521
522 if (more)
523 /* There's more set now than before. We
524 * propagate the new mask to the parent's mask
525 * (not caring if it actually was valid or
526 * not). */
527
528 s->cgroup_members_mask |= m;
529
530 else
531 /* There's less set now than before (or we
532 * don't know), we need to recalculate
533 * everything, so let's invalidate the
534 * parent's members mask */
535
536 s->cgroup_members_mask_valid = false;
537
538 /* And now make sure that this change also hits our
539 * grandparents */
540 unit_update_cgroup_members_masks(s);
6414b7c9
DS
541 }
542}
543
03b90d4b
LP
544static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
545 Unit *u = userdata;
546
547 assert(mask != 0);
548 assert(u);
549
550 while (u) {
551 if (u->cgroup_path &&
552 u->cgroup_realized &&
553 (u->cgroup_realized_mask & mask) == mask)
554 return u->cgroup_path;
555
556 u = UNIT_DEREF(u->slice);
557 }
558
559 return NULL;
560}
561
4ad49000 562static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 563 _cleanup_free_ char *path = NULL;
bc432dc7 564 int r;
64747e2d 565
4ad49000 566 assert(u);
64747e2d 567
4ad49000
LP
568 path = unit_default_cgroup_path(u);
569 if (!path)
a94042fa 570 return log_oom();
64747e2d 571
0a1eb06d 572 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
573 if (r < 0) {
574 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 575 return r;
b58b8e11 576 }
03b90d4b 577 if (r > 0) {
b58b8e11 578 u->cgroup_path = path;
a94042fa 579 path = NULL;
b58b8e11
HH
580 }
581
03b90d4b
LP
582 /* First, create our own group */
583 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
584 if (r < 0) {
585 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
586 return r;
587 }
588
589 /* Keep track that this is now realized */
4ad49000 590 u->cgroup_realized = true;
bc432dc7 591 u->cgroup_realized_mask = mask;
4ad49000 592
03b90d4b
LP
593 /* Then, possibly move things over */
594 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
595 if (r < 0)
596 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
597
64747e2d
LP
598 return 0;
599}
600
6414b7c9 601static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
602 assert(u);
603
604 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
605}
606
607/* Check if necessary controllers and attributes for a unit are in place.
608 *
609 * If so, do nothing.
610 * If not, create paths, move processes over, and set attributes.
611 *
612 * Returns 0 on success and < 0 on failure. */
0a1eb06d 613static int unit_realize_cgroup_now(Unit *u) {
4ad49000 614 CGroupControllerMask mask;
6414b7c9 615 int r;
64747e2d 616
4ad49000 617 assert(u);
64747e2d 618
4ad49000 619 if (u->in_cgroup_queue) {
71fda00f 620 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
621 u->in_cgroup_queue = false;
622 }
64747e2d 623
6414b7c9 624 mask = unit_get_target_mask(u);
64747e2d 625
6414b7c9 626 if (unit_has_mask_realized(u, mask))
0a1eb06d 627 return 0;
64747e2d 628
4ad49000 629 /* First, realize parents */
6414b7c9
DS
630 if (UNIT_ISSET(u->slice)) {
631 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice));
632 if (r < 0)
633 return r;
634 }
4ad49000
LP
635
636 /* And then do the real work */
6414b7c9
DS
637 r = unit_create_cgroups(u, mask);
638 if (r < 0)
639 return r;
640
641 /* Finally, apply the necessary attributes. */
642 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path);
643
644 return 0;
64747e2d
LP
645}
646
4ad49000 647static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 648
4ad49000
LP
649 if (u->in_cgroup_queue)
650 return;
8e274523 651
71fda00f 652 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
653 u->in_cgroup_queue = true;
654}
8c6db833 655
4ad49000
LP
656unsigned manager_dispatch_cgroup_queue(Manager *m) {
657 Unit *i;
658 unsigned n = 0;
6414b7c9 659 int r;
ecedd90f 660
4ad49000
LP
661 while ((i = m->cgroup_queue)) {
662 assert(i->in_cgroup_queue);
ecedd90f 663
6414b7c9
DS
664 r = unit_realize_cgroup_now(i);
665 if (r < 0)
666 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 667
4ad49000
LP
668 n++;
669 }
ecedd90f 670
4ad49000 671 return n;
8e274523
LP
672}
673
4ad49000
LP
674static void unit_queue_siblings(Unit *u) {
675 Unit *slice;
ca949c9d 676
4ad49000
LP
677 /* This adds the siblings of the specified unit and the
678 * siblings of all parent units to the cgroup queue. (But
679 * neither the specified unit itself nor the parents.) */
680
681 while ((slice = UNIT_DEREF(u->slice))) {
682 Iterator i;
683 Unit *m;
8f53a7b8 684
4ad49000
LP
685 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
686 if (m == u)
687 continue;
8e274523 688
6414b7c9
DS
689 /* Skip units that have a dependency on the slice
690 * but aren't actually in it. */
4ad49000 691 if (UNIT_DEREF(m->slice) != slice)
50159e6a 692 continue;
8e274523 693
6414b7c9
DS
694 /* No point in doing cgroup application for units
695 * without active processes. */
696 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
697 continue;
698
699 /* If the unit doesn't need any new controllers
700 * and has current ones realized, it doesn't need
701 * any changes. */
702 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
703 continue;
704
4ad49000 705 unit_add_to_cgroup_queue(m);
50159e6a
LP
706 }
707
4ad49000 708 u = slice;
8e274523 709 }
4ad49000
LP
710}
711
0a1eb06d 712int unit_realize_cgroup(Unit *u) {
4ad49000
LP
713 CGroupContext *c;
714
715 assert(u);
716
717 c = unit_get_cgroup_context(u);
718 if (!c)
0a1eb06d 719 return 0;
8e274523 720
4ad49000
LP
721 /* So, here's the deal: when realizing the cgroups for this
722 * unit, we need to first create all parents, but there's more
723 * actually: for the weight-based controllers we also need to
724 * make sure that all our siblings (i.e. units that are in the
73e231ab 725 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
726 * would become very uneven as each of their processes would
727 * get as much resources as all our group together. This call
728 * will synchronously create the parent cgroups, but will
729 * defer work on the siblings to the next event loop
730 * iteration. */
ca949c9d 731
4ad49000
LP
732 /* Add all sibling slices to the cgroup queue. */
733 unit_queue_siblings(u);
734
6414b7c9 735 /* And realize this one now (and apply the values) */
bc432dc7 736 return unit_realize_cgroup_now(u);
8e274523
LP
737}
738
4ad49000 739void unit_destroy_cgroup(Unit *u) {
8e274523
LP
740 int r;
741
4ad49000 742 assert(u);
8e274523 743
4ad49000
LP
744 if (!u->cgroup_path)
745 return;
8e274523 746
13b84ec7 747 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 748 if (r < 0)
376dd21d 749 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 750
0a1eb06d
LP
751 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
752
4ad49000
LP
753 free(u->cgroup_path);
754 u->cgroup_path = NULL;
755 u->cgroup_realized = false;
bc432dc7 756 u->cgroup_realized_mask = 0;
0a1eb06d 757
8e274523
LP
758}
759
4ad49000
LP
760pid_t unit_search_main_pid(Unit *u) {
761 _cleanup_fclose_ FILE *f = NULL;
762 pid_t pid = 0, npid, mypid;
763
764 assert(u);
765
766 if (!u->cgroup_path)
767 return 0;
768
769 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
770 return 0;
771
772 mypid = getpid();
773 while (cg_read_pid(f, &npid) > 0) {
774 pid_t ppid;
775
776 if (npid == pid)
777 continue;
8e274523 778
4ad49000
LP
779 /* Ignore processes that aren't our kids */
780 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
781 continue;
8e274523 782
4ad49000
LP
783 if (pid != 0) {
784 /* Dang, there's more than one daemonized PID
785 in this group, so we don't know what process
786 is the main process. */
787 pid = 0;
788 break;
789 }
8e274523 790
4ad49000 791 pid = npid;
8e274523
LP
792 }
793
4ad49000 794 return pid;
8e274523
LP
795}
796
8e274523 797int manager_setup_cgroup(Manager *m) {
9444b1f2 798 _cleanup_free_ char *path = NULL;
15c60e99 799 char *e;
8e274523 800 int r;
8e274523
LP
801
802 assert(m);
803
e5a53dc7 804 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 805 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
806 log_warning("No control group support available, not creating root group.");
807 return 0;
808 }
809
35d2e7ec 810 /* 1. Determine hierarchy */
9444b1f2
LP
811 free(m->cgroup_root);
812 m->cgroup_root = NULL;
813
814 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 815 if (r < 0) {
12235040 816 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 817 return r;
12235040 818 }
8e274523 819
15c60e99
LP
820 /* LEGACY: Already in /system.slice? If so, let's cut this
821 * off. This is to support live upgrades from older systemd
822 * versions where PID 1 was moved there. */
9444b1f2
LP
823 if (m->running_as == SYSTEMD_SYSTEM) {
824 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
825 if (!e)
826 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
827 if (e)
828 *e = 0;
0baf24dd 829 }
7ccfb64a 830
9444b1f2
LP
831 /* And make sure to store away the root value without trailing
832 * slash, even for the root dir, so that we can easily prepend
833 * it everywhere. */
834 if (streq(m->cgroup_root, "/"))
835 m->cgroup_root[0] = 0;
8e274523 836
35d2e7ec 837 /* 2. Show data */
9444b1f2 838 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 839 if (r < 0) {
12235040 840 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 841 return r;
12235040 842 }
8e274523 843
c6c18be3
LP
844 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
845
35d2e7ec 846 /* 3. Install agent */
a32360f1
LP
847 if (m->running_as == SYSTEMD_SYSTEM) {
848 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
849 if (r < 0)
850 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
851 else if (r > 0)
852 log_debug("Installed release agent.");
853 else
854 log_debug("Release agent already installed.");
855 }
8e274523 856
15c60e99
LP
857 /* 4. Make sure we are in the root cgroup */
858 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
9156e799 859 if (r < 0) {
8e274523 860 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 861 return r;
c6c18be3
LP
862 }
863
35d2e7ec 864 /* 5. And pin it, so that it cannot be unmounted */
c6c18be3
LP
865 if (m->pin_cgroupfs_fd >= 0)
866 close_nointr_nofail(m->pin_cgroupfs_fd);
867
9156e799
LP
868 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
869 if (r < 0) {
12235040 870 log_error("Failed to open pin file: %m");
a32360f1 871 return -errno;
c6c18be3
LP
872 }
873
4ad49000
LP
874 /* 6. Figure out which controllers are supported */
875 m->cgroup_supported = cg_mask_supported();
9156e799 876
e58cec11
LP
877 /* 7. Always enable hierarchial support if it exists... */
878 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
879
a32360f1 880 return 0;
8e274523
LP
881}
882
c6c18be3 883void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
884 assert(m);
885
9444b1f2
LP
886 /* We can't really delete the group, since we are in it. But
887 * let's trim it. */
888 if (delete && m->cgroup_root)
889 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 890
c6c18be3
LP
891 if (m->pin_cgroupfs_fd >= 0) {
892 close_nointr_nofail(m->pin_cgroupfs_fd);
893 m->pin_cgroupfs_fd = -1;
894 }
895
9444b1f2
LP
896 free(m->cgroup_root);
897 m->cgroup_root = NULL;
8e274523
LP
898}
899
4ad49000 900Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 901 char *p;
4ad49000 902 Unit *u;
acb14d31
LP
903
904 assert(m);
905 assert(cgroup);
acb14d31 906
4ad49000
LP
907 u = hashmap_get(m->cgroup_unit, cgroup);
908 if (u)
909 return u;
acb14d31 910
8e70580b 911 p = strdupa(cgroup);
acb14d31
LP
912 for (;;) {
913 char *e;
914
915 e = strrchr(p, '/');
4ad49000
LP
916 if (e == p || !e)
917 return NULL;
acb14d31
LP
918
919 *e = 0;
920
4ad49000
LP
921 u = hashmap_get(m->cgroup_unit, p);
922 if (u)
923 return u;
acb14d31
LP
924 }
925}
926
4ad49000
LP
927Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
928 _cleanup_free_ char *cgroup = NULL;
acb14d31 929 int r;
8e274523 930
8c47c732
LP
931 assert(m);
932
933 if (pid <= 1)
934 return NULL;
935
4ad49000
LP
936 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
937 if (r < 0)
6dde1f33
LP
938 return NULL;
939
4ad49000 940 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 941}
4fbf50b3 942
4ad49000
LP
943int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
944 Unit *u;
945 int r;
4fbf50b3 946
4ad49000
LP
947 assert(m);
948 assert(cgroup);
4fbf50b3 949
4ad49000 950 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 951 if (u) {
06025d91
LP
952 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
953 if (r > 0) {
954 if (UNIT_VTABLE(u)->notify_cgroup_empty)
955 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 956
06025d91
LP
957 unit_add_to_gc_queue(u);
958 }
b56c28c3 959 }
2633eb83 960
4ad49000 961 return 0;
4fbf50b3
LP
962}
963
4ad49000
LP
964static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
965 [CGROUP_AUTO] = "auto",
966 [CGROUP_CLOSED] = "closed",
967 [CGROUP_STRICT] = "strict",
968};
4fbf50b3 969
4ad49000 970DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);