]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
gpt-auto-generator: rename root device node symlink to /dev/gpt-auto-root
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
8c6db833 23
9eb977db 24#include "path-util.h"
9444b1f2 25#include "special.h"
4ad49000
LP
26#include "cgroup-util.h"
27#include "cgroup.h"
8e274523 28
4ad49000
LP
29void cgroup_context_init(CGroupContext *c) {
30 assert(c);
31
32 /* Initialize everything to the kernel defaults, assuming the
33 * structure is preinitialized to 0 */
34
35 c->cpu_shares = 1024;
ddca82ac 36 c->memory_limit = (uint64_t) -1;
4ad49000
LP
37 c->blockio_weight = 1000;
38}
8e274523 39
4ad49000
LP
40void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
41 assert(c);
42 assert(a);
43
71fda00f 44 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
45 free(a->path);
46 free(a);
47}
48
49void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
50 assert(c);
51 assert(w);
52
71fda00f 53 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
54 free(w->path);
55 free(w);
56}
57
58void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
59 assert(c);
8e274523 60 assert(b);
8e274523 61
71fda00f 62 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
63 free(b->path);
64 free(b);
65}
66
67void cgroup_context_done(CGroupContext *c) {
68 assert(c);
69
70 while (c->blockio_device_weights)
71 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
72
73 while (c->blockio_device_bandwidths)
74 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
75
76 while (c->device_allow)
77 cgroup_context_free_device_allow(c, c->device_allow);
78}
79
80void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
81 CGroupBlockIODeviceBandwidth *b;
82 CGroupBlockIODeviceWeight *w;
83 CGroupDeviceAllow *a;
84
85 assert(c);
86 assert(f);
87
88 prefix = strempty(prefix);
89
90 fprintf(f,
91 "%sCPUAccounting=%s\n"
92 "%sBlockIOAccounting=%s\n"
93 "%sMemoryAccounting=%s\n"
94 "%sCPUShares=%lu\n"
112a7f46 95 "%sBlockIOWeight=%lu\n"
4ad49000 96 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
97 "%sDevicePolicy=%s\n",
98 prefix, yes_no(c->cpu_accounting),
99 prefix, yes_no(c->blockio_accounting),
100 prefix, yes_no(c->memory_accounting),
101 prefix, c->cpu_shares,
102 prefix, c->blockio_weight,
103 prefix, c->memory_limit,
4ad49000
LP
104 prefix, cgroup_device_policy_to_string(c->device_policy));
105
106 LIST_FOREACH(device_allow, a, c->device_allow)
107 fprintf(f,
108 "%sDeviceAllow=%s %s%s%s\n",
109 prefix,
110 a->path,
111 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
112
113 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
114 fprintf(f,
8e7076ca 115 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
116 prefix,
117 w->path,
118 w->weight);
119
120 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
121 char buf[FORMAT_BYTES_MAX];
122
123 fprintf(f,
124 "%s%s=%s %s\n",
125 prefix,
126 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
127 b->path,
128 format_bytes(buf, sizeof(buf), b->bandwidth));
129 }
130}
131
132static int lookup_blkio_device(const char *p, dev_t *dev) {
133 struct stat st;
134 int r;
135
136 assert(p);
137 assert(dev);
138
139 r = stat(p, &st);
ab1f0633 140 if (r < 0) {
4ad49000
LP
141 log_warning("Couldn't stat device %s: %m", p);
142 return -errno;
ab1f0633 143 }
8e274523 144
4ad49000
LP
145 if (S_ISBLK(st.st_mode))
146 *dev = st.st_rdev;
147 else if (major(st.st_dev) != 0) {
148 /* If this is not a device node then find the block
149 * device this file is stored on */
150 *dev = st.st_dev;
151
152 /* If this is a partition, try to get the originating
153 * block device */
154 block_get_whole_disk(*dev, dev);
155 } else {
156 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
157 return -ENODEV;
158 }
8e274523 159
8e274523 160 return 0;
8e274523
LP
161}
162
4ad49000
LP
163static int whitelist_device(const char *path, const char *node, const char *acc) {
164 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
165 struct stat st;
8c6db833 166 int r;
8e274523 167
4ad49000
LP
168 assert(path);
169 assert(acc);
8e274523 170
4ad49000
LP
171 if (stat(node, &st) < 0) {
172 log_warning("Couldn't stat device %s", node);
173 return -errno;
174 }
175
176 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
177 log_warning("%s is not a device.", node);
178 return -ENODEV;
179 }
180
181 sprintf(buf,
182 "%c %u:%u %s",
183 S_ISCHR(st.st_mode) ? 'c' : 'b',
184 major(st.st_rdev), minor(st.st_rdev),
185 acc);
186
187 r = cg_set_attribute("devices", path, "devices.allow", buf);
188 if (r < 0)
189 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
190
191 return r;
8e274523
LP
192}
193
90060676
LP
194static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
195 _cleanup_fclose_ FILE *f = NULL;
196 char line[LINE_MAX];
197 bool good = false;
198 int r;
199
200 assert(path);
201 assert(acc);
202 assert(type == 'b' || type == 'c');
203
204 f = fopen("/proc/devices", "re");
205 if (!f) {
206 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
207 return -errno;
208 }
209
210 FOREACH_LINE(line, f, goto fail) {
211 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
212 unsigned maj;
213
214 truncate_nl(line);
215
216 if (type == 'c' && streq(line, "Character devices:")) {
217 good = true;
218 continue;
219 }
220
221 if (type == 'b' && streq(line, "Block devices:")) {
222 good = true;
223 continue;
224 }
225
226 if (isempty(line)) {
227 good = false;
228 continue;
229 }
230
231 if (!good)
232 continue;
233
234 p = strstrip(line);
235
236 w = strpbrk(p, WHITESPACE);
237 if (!w)
238 continue;
239 *w = 0;
240
241 r = safe_atou(p, &maj);
242 if (r < 0)
243 continue;
244 if (maj <= 0)
245 continue;
246
247 w++;
248 w += strspn(w, WHITESPACE);
249 if (!streq(w, name))
250 continue;
251
252 sprintf(buf,
253 "%c %u:* %s",
254 type,
255 maj,
256 acc);
257
258 r = cg_set_attribute("devices", path, "devices.allow", buf);
259 if (r < 0)
260 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
261 }
262
263 return 0;
264
265fail:
266 log_warning("Failed to read /proc/devices: %m");
267 return -errno;
268}
269
4ad49000 270void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
01efdf13 271 bool is_root;
4ad49000
LP
272 int r;
273
274 assert(c);
275 assert(path);
8e274523 276
4ad49000
LP
277 if (mask == 0)
278 return;
8e274523 279
01efdf13
LP
280 /* Some cgroup attributes are not support on the root cgroup,
281 * hence silently ignore */
282 is_root = isempty(path) || path_equal(path, "/");
283
284 if ((mask & CGROUP_CPU) && !is_root) {
4ad49000 285 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
8e274523 286
4ad49000
LP
287 sprintf(buf, "%lu\n", c->cpu_shares);
288 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
289 if (r < 0)
290 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
291 }
292
293 if (mask & CGROUP_BLKIO) {
294 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
295 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
296 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
297 CGroupBlockIODeviceWeight *w;
298 CGroupBlockIODeviceBandwidth *b;
299
01efdf13
LP
300 if (!is_root) {
301 sprintf(buf, "%lu\n", c->blockio_weight);
302 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
303 if (r < 0)
304 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 305
01efdf13
LP
306 /* FIXME: no way to reset this list */
307 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
308 dev_t dev;
4ad49000 309
01efdf13
LP
310 r = lookup_blkio_device(w->path, &dev);
311 if (r < 0)
312 continue;
8e274523 313
01efdf13
LP
314 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
315 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
316 if (r < 0)
317 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
318 }
4ad49000
LP
319 }
320
321 /* FIXME: no way to reset this list */
322 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
323 const char *a;
324 dev_t dev;
325
326 r = lookup_blkio_device(b->path, &dev);
327 if (r < 0)
328 continue;
329
330 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
331
332 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
333 r = cg_set_attribute("blkio", path, a, buf);
334 if (r < 0)
335 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 336 }
8e274523
LP
337 }
338
4ad49000 339 if (mask & CGROUP_MEMORY) {
6a94f2e9 340 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
341 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
342
6a94f2e9
G
343 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
344 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
345 } else
346 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 347
4ad49000
LP
348 if (r < 0)
349 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 350 }
8e274523 351
01efdf13 352 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 353 CGroupDeviceAllow *a;
8e274523 354
4ad49000
LP
355 if (c->device_allow || c->device_policy != CGROUP_AUTO)
356 r = cg_set_attribute("devices", path, "devices.deny", "a");
357 else
358 r = cg_set_attribute("devices", path, "devices.allow", "a");
359 if (r < 0)
01efdf13 360 log_warning("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 361
4ad49000
LP
362 if (c->device_policy == CGROUP_CLOSED ||
363 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
364 static const char auto_devices[] =
365 "/dev/null\0" "rw\0"
366 "/dev/zero\0" "rw\0"
367 "/dev/full\0" "rw\0"
368 "/dev/random\0" "rw\0"
369 "/dev/urandom\0" "rw\0";
370
371 const char *x, *y;
372
373 NULSTR_FOREACH_PAIR(x, y, auto_devices)
374 whitelist_device(path, x, y);
375 }
376
377 LIST_FOREACH(device_allow, a, c->device_allow) {
378 char acc[4];
379 unsigned k = 0;
380
381 if (a->r)
382 acc[k++] = 'r';
383 if (a->w)
384 acc[k++] = 'w';
385 if (a->m)
386 acc[k++] = 'm';
fb385181 387
4ad49000
LP
388 if (k == 0)
389 continue;
fb385181 390
4ad49000 391 acc[k++] = 0;
90060676
LP
392
393 if (startswith(a->path, "/dev/"))
394 whitelist_device(path, a->path, acc);
395 else if (startswith(a->path, "block-"))
396 whitelist_major(path, a->path + 6, 'b', acc);
397 else if (startswith(a->path, "char-"))
398 whitelist_major(path, a->path + 5, 'c', acc);
399 else
400 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
401 }
402 }
fb385181
LP
403}
404
4ad49000
LP
405CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
406 CGroupControllerMask mask = 0;
8e274523 407
4ad49000 408 /* Figure out which controllers we need */
8e274523 409
4ad49000
LP
410 if (c->cpu_accounting || c->cpu_shares != 1024)
411 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 412
4ad49000
LP
413 if (c->blockio_accounting ||
414 c->blockio_weight != 1000 ||
415 c->blockio_device_weights ||
416 c->blockio_device_bandwidths)
417 mask |= CGROUP_BLKIO;
ecedd90f 418
4ad49000 419 if (c->memory_accounting ||
ddca82ac 420 c->memory_limit != (uint64_t) -1)
4ad49000 421 mask |= CGROUP_MEMORY;
8e274523 422
4ad49000
LP
423 if (c->device_allow || c->device_policy != CGROUP_AUTO)
424 mask |= CGROUP_DEVICE;
425
426 return mask;
8e274523
LP
427}
428
bc432dc7 429CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 430 CGroupContext *c;
8e274523 431
4ad49000
LP
432 c = unit_get_cgroup_context(u);
433 if (!c)
434 return 0;
8e274523 435
4ad49000 436 return cgroup_context_get_mask(c);
8e274523
LP
437}
438
bc432dc7 439CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 440 assert(u);
bc432dc7
LP
441
442 if (u->cgroup_members_mask_valid)
443 return u->cgroup_members_mask;
444
445 u->cgroup_members_mask = 0;
446
447 if (u->type == UNIT_SLICE) {
448 Unit *member;
449 Iterator i;
450
451 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
452
453 if (member == u)
454 continue;
455
d4fdc205 456 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
457 continue;
458
459 u->cgroup_members_mask |=
460 unit_get_cgroup_mask(member) |
461 unit_get_members_mask(member);
462 }
463 }
464
465 u->cgroup_members_mask_valid = true;
6414b7c9 466 return u->cgroup_members_mask;
246aa6dd
LP
467}
468
bc432dc7
LP
469CGroupControllerMask unit_get_siblings_mask(Unit *u) {
470 CGroupControllerMask m;
471
4ad49000 472 assert(u);
246aa6dd 473
bc432dc7
LP
474 if (UNIT_ISSET(u->slice))
475 m = unit_get_members_mask(UNIT_DEREF(u->slice));
476 else
477 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
4ad49000
LP
478
479 /* Sibling propagation is only relevant for weight-based
480 * controllers, so let's mask out everything else */
bc432dc7 481 return m & (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
246aa6dd
LP
482}
483
bc432dc7 484CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
485 CGroupControllerMask mask;
486
487 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
488 mask &= u->manager->cgroup_supported;
489
490 return mask;
491}
492
493/* Recurse from a unit up through its containing slices, propagating
494 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
495void unit_update_cgroup_members_masks(Unit *u) {
496 CGroupControllerMask m;
497 bool more;
498
499 assert(u);
500
501 /* Calculate subtree mask */
502 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
503
504 /* See if anything changed from the previous invocation. If
505 * not, we're done. */
506 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
507 return;
508
509 more =
510 u->cgroup_subtree_mask_valid &&
511 ((m & ~u->cgroup_subtree_mask) != 0) &&
512 ((~m & u->cgroup_subtree_mask) == 0);
513
514 u->cgroup_subtree_mask = m;
515 u->cgroup_subtree_mask_valid = true;
516
6414b7c9
DS
517 if (UNIT_ISSET(u->slice)) {
518 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
519
520 if (more)
521 /* There's more set now than before. We
522 * propagate the new mask to the parent's mask
523 * (not caring if it actually was valid or
524 * not). */
525
526 s->cgroup_members_mask |= m;
527
528 else
529 /* There's less set now than before (or we
530 * don't know), we need to recalculate
531 * everything, so let's invalidate the
532 * parent's members mask */
533
534 s->cgroup_members_mask_valid = false;
535
536 /* And now make sure that this change also hits our
537 * grandparents */
538 unit_update_cgroup_members_masks(s);
6414b7c9
DS
539 }
540}
541
03b90d4b
LP
542static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
543 Unit *u = userdata;
544
545 assert(mask != 0);
546 assert(u);
547
548 while (u) {
549 if (u->cgroup_path &&
550 u->cgroup_realized &&
551 (u->cgroup_realized_mask & mask) == mask)
552 return u->cgroup_path;
553
554 u = UNIT_DEREF(u->slice);
555 }
556
557 return NULL;
558}
559
4ad49000 560static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 561 _cleanup_free_ char *path = NULL;
bc432dc7 562 int r;
64747e2d 563
4ad49000 564 assert(u);
64747e2d 565
4ad49000
LP
566 path = unit_default_cgroup_path(u);
567 if (!path)
a94042fa 568 return log_oom();
64747e2d 569
0a1eb06d 570 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
571 if (r < 0) {
572 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 573 return r;
b58b8e11 574 }
03b90d4b 575 if (r > 0) {
b58b8e11 576 u->cgroup_path = path;
a94042fa 577 path = NULL;
b58b8e11
HH
578 }
579
03b90d4b
LP
580 /* First, create our own group */
581 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
582 if (r < 0) {
583 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
584 return r;
585 }
586
587 /* Keep track that this is now realized */
4ad49000 588 u->cgroup_realized = true;
bc432dc7 589 u->cgroup_realized_mask = mask;
4ad49000 590
03b90d4b
LP
591 /* Then, possibly move things over */
592 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
593 if (r < 0)
594 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
595
64747e2d
LP
596 return 0;
597}
598
6414b7c9 599static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
600 assert(u);
601
602 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
603}
604
605/* Check if necessary controllers and attributes for a unit are in place.
606 *
607 * If so, do nothing.
608 * If not, create paths, move processes over, and set attributes.
609 *
610 * Returns 0 on success and < 0 on failure. */
0a1eb06d 611static int unit_realize_cgroup_now(Unit *u) {
4ad49000 612 CGroupControllerMask mask;
6414b7c9 613 int r;
64747e2d 614
4ad49000 615 assert(u);
64747e2d 616
4ad49000 617 if (u->in_cgroup_queue) {
71fda00f 618 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
619 u->in_cgroup_queue = false;
620 }
64747e2d 621
6414b7c9 622 mask = unit_get_target_mask(u);
64747e2d 623
6414b7c9 624 if (unit_has_mask_realized(u, mask))
0a1eb06d 625 return 0;
64747e2d 626
4ad49000 627 /* First, realize parents */
6414b7c9
DS
628 if (UNIT_ISSET(u->slice)) {
629 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice));
630 if (r < 0)
631 return r;
632 }
4ad49000
LP
633
634 /* And then do the real work */
6414b7c9
DS
635 r = unit_create_cgroups(u, mask);
636 if (r < 0)
637 return r;
638
639 /* Finally, apply the necessary attributes. */
640 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path);
641
642 return 0;
64747e2d
LP
643}
644
4ad49000 645static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 646
4ad49000
LP
647 if (u->in_cgroup_queue)
648 return;
8e274523 649
71fda00f 650 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
651 u->in_cgroup_queue = true;
652}
8c6db833 653
4ad49000
LP
654unsigned manager_dispatch_cgroup_queue(Manager *m) {
655 Unit *i;
656 unsigned n = 0;
6414b7c9 657 int r;
ecedd90f 658
4ad49000
LP
659 while ((i = m->cgroup_queue)) {
660 assert(i->in_cgroup_queue);
ecedd90f 661
6414b7c9
DS
662 r = unit_realize_cgroup_now(i);
663 if (r < 0)
664 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 665
4ad49000
LP
666 n++;
667 }
ecedd90f 668
4ad49000 669 return n;
8e274523
LP
670}
671
4ad49000
LP
672static void unit_queue_siblings(Unit *u) {
673 Unit *slice;
ca949c9d 674
4ad49000
LP
675 /* This adds the siblings of the specified unit and the
676 * siblings of all parent units to the cgroup queue. (But
677 * neither the specified unit itself nor the parents.) */
678
679 while ((slice = UNIT_DEREF(u->slice))) {
680 Iterator i;
681 Unit *m;
8f53a7b8 682
4ad49000
LP
683 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
684 if (m == u)
685 continue;
8e274523 686
6414b7c9
DS
687 /* Skip units that have a dependency on the slice
688 * but aren't actually in it. */
4ad49000 689 if (UNIT_DEREF(m->slice) != slice)
50159e6a 690 continue;
8e274523 691
6414b7c9
DS
692 /* No point in doing cgroup application for units
693 * without active processes. */
694 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
695 continue;
696
697 /* If the unit doesn't need any new controllers
698 * and has current ones realized, it doesn't need
699 * any changes. */
700 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
701 continue;
702
4ad49000 703 unit_add_to_cgroup_queue(m);
50159e6a
LP
704 }
705
4ad49000 706 u = slice;
8e274523 707 }
4ad49000
LP
708}
709
0a1eb06d 710int unit_realize_cgroup(Unit *u) {
4ad49000
LP
711 CGroupContext *c;
712
713 assert(u);
714
715 c = unit_get_cgroup_context(u);
716 if (!c)
0a1eb06d 717 return 0;
8e274523 718
4ad49000
LP
719 /* So, here's the deal: when realizing the cgroups for this
720 * unit, we need to first create all parents, but there's more
721 * actually: for the weight-based controllers we also need to
722 * make sure that all our siblings (i.e. units that are in the
73e231ab 723 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
724 * would become very uneven as each of their processes would
725 * get as much resources as all our group together. This call
726 * will synchronously create the parent cgroups, but will
727 * defer work on the siblings to the next event loop
728 * iteration. */
ca949c9d 729
4ad49000
LP
730 /* Add all sibling slices to the cgroup queue. */
731 unit_queue_siblings(u);
732
6414b7c9 733 /* And realize this one now (and apply the values) */
bc432dc7 734 return unit_realize_cgroup_now(u);
8e274523
LP
735}
736
4ad49000 737void unit_destroy_cgroup(Unit *u) {
8e274523
LP
738 int r;
739
4ad49000 740 assert(u);
8e274523 741
4ad49000
LP
742 if (!u->cgroup_path)
743 return;
8e274523 744
13b84ec7 745 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 746 if (r < 0)
376dd21d 747 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 748
0a1eb06d
LP
749 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
750
4ad49000
LP
751 free(u->cgroup_path);
752 u->cgroup_path = NULL;
753 u->cgroup_realized = false;
bc432dc7 754 u->cgroup_realized_mask = 0;
0a1eb06d 755
8e274523
LP
756}
757
4ad49000
LP
758pid_t unit_search_main_pid(Unit *u) {
759 _cleanup_fclose_ FILE *f = NULL;
760 pid_t pid = 0, npid, mypid;
761
762 assert(u);
763
764 if (!u->cgroup_path)
765 return 0;
766
767 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
768 return 0;
769
770 mypid = getpid();
771 while (cg_read_pid(f, &npid) > 0) {
772 pid_t ppid;
773
774 if (npid == pid)
775 continue;
8e274523 776
4ad49000
LP
777 /* Ignore processes that aren't our kids */
778 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
779 continue;
8e274523 780
4ad49000
LP
781 if (pid != 0) {
782 /* Dang, there's more than one daemonized PID
783 in this group, so we don't know what process
784 is the main process. */
785 pid = 0;
786 break;
787 }
8e274523 788
4ad49000 789 pid = npid;
8e274523
LP
790 }
791
4ad49000 792 return pid;
8e274523
LP
793}
794
8e274523 795int manager_setup_cgroup(Manager *m) {
9444b1f2 796 _cleanup_free_ char *path = NULL;
15c60e99 797 char *e;
8e274523 798 int r;
8e274523
LP
799
800 assert(m);
801
e5a53dc7 802 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 803 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
804 log_warning("No control group support available, not creating root group.");
805 return 0;
806 }
807
35d2e7ec 808 /* 1. Determine hierarchy */
9444b1f2
LP
809 free(m->cgroup_root);
810 m->cgroup_root = NULL;
811
812 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 813 if (r < 0) {
12235040 814 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 815 return r;
12235040 816 }
8e274523 817
15c60e99
LP
818 /* LEGACY: Already in /system.slice? If so, let's cut this
819 * off. This is to support live upgrades from older systemd
820 * versions where PID 1 was moved there. */
9444b1f2
LP
821 if (m->running_as == SYSTEMD_SYSTEM) {
822 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
823 if (!e)
824 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
825 if (e)
826 *e = 0;
0baf24dd 827 }
7ccfb64a 828
9444b1f2
LP
829 /* And make sure to store away the root value without trailing
830 * slash, even for the root dir, so that we can easily prepend
831 * it everywhere. */
832 if (streq(m->cgroup_root, "/"))
833 m->cgroup_root[0] = 0;
8e274523 834
35d2e7ec 835 /* 2. Show data */
9444b1f2 836 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 837 if (r < 0) {
12235040 838 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 839 return r;
12235040 840 }
8e274523 841
c6c18be3
LP
842 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
843
35d2e7ec 844 /* 3. Install agent */
a32360f1
LP
845 if (m->running_as == SYSTEMD_SYSTEM) {
846 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
847 if (r < 0)
848 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
849 else if (r > 0)
850 log_debug("Installed release agent.");
851 else
852 log_debug("Release agent already installed.");
853 }
8e274523 854
15c60e99
LP
855 /* 4. Make sure we are in the root cgroup */
856 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
9156e799 857 if (r < 0) {
8e274523 858 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 859 return r;
c6c18be3
LP
860 }
861
35d2e7ec 862 /* 5. And pin it, so that it cannot be unmounted */
c6c18be3
LP
863 if (m->pin_cgroupfs_fd >= 0)
864 close_nointr_nofail(m->pin_cgroupfs_fd);
865
9156e799
LP
866 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
867 if (r < 0) {
12235040 868 log_error("Failed to open pin file: %m");
a32360f1 869 return -errno;
c6c18be3
LP
870 }
871
4ad49000
LP
872 /* 6. Figure out which controllers are supported */
873 m->cgroup_supported = cg_mask_supported();
9156e799 874
e58cec11
LP
875 /* 7. Always enable hierarchial support if it exists... */
876 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
877
a32360f1 878 return 0;
8e274523
LP
879}
880
c6c18be3 881void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
882 assert(m);
883
9444b1f2
LP
884 /* We can't really delete the group, since we are in it. But
885 * let's trim it. */
886 if (delete && m->cgroup_root)
887 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 888
c6c18be3
LP
889 if (m->pin_cgroupfs_fd >= 0) {
890 close_nointr_nofail(m->pin_cgroupfs_fd);
891 m->pin_cgroupfs_fd = -1;
892 }
893
9444b1f2
LP
894 free(m->cgroup_root);
895 m->cgroup_root = NULL;
8e274523
LP
896}
897
4ad49000 898Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 899 char *p;
4ad49000 900 Unit *u;
acb14d31
LP
901
902 assert(m);
903 assert(cgroup);
acb14d31 904
4ad49000
LP
905 u = hashmap_get(m->cgroup_unit, cgroup);
906 if (u)
907 return u;
acb14d31 908
8e70580b 909 p = strdupa(cgroup);
acb14d31
LP
910 for (;;) {
911 char *e;
912
913 e = strrchr(p, '/');
4ad49000
LP
914 if (e == p || !e)
915 return NULL;
acb14d31
LP
916
917 *e = 0;
918
4ad49000
LP
919 u = hashmap_get(m->cgroup_unit, p);
920 if (u)
921 return u;
acb14d31
LP
922 }
923}
924
4ad49000
LP
925Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
926 _cleanup_free_ char *cgroup = NULL;
acb14d31 927 int r;
8e274523 928
8c47c732
LP
929 assert(m);
930
931 if (pid <= 1)
932 return NULL;
933
4ad49000
LP
934 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
935 if (r < 0)
6dde1f33
LP
936 return NULL;
937
4ad49000 938 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 939}
4fbf50b3 940
4ad49000
LP
941int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
942 Unit *u;
943 int r;
4fbf50b3 944
4ad49000
LP
945 assert(m);
946 assert(cgroup);
4fbf50b3 947
4ad49000 948 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 949 if (u) {
06025d91
LP
950 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
951 if (r > 0) {
952 if (UNIT_VTABLE(u)->notify_cgroup_empty)
953 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 954
06025d91
LP
955 unit_add_to_gc_queue(u);
956 }
b56c28c3 957 }
2633eb83 958
4ad49000 959 return 0;
4fbf50b3
LP
960}
961
4ad49000
LP
962static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
963 [CGROUP_AUTO] = "auto",
964 [CGROUP_CLOSED] = "closed",
965 [CGROUP_STRICT] = "strict",
966};
4fbf50b3 967
4ad49000 968DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);