]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
core: make sure we always write changed cgroup attributes to the cgroupfs
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
4ad49000
LP
30void cgroup_context_init(CGroupContext *c) {
31 assert(c);
32
33 /* Initialize everything to the kernel defaults, assuming the
34 * structure is preinitialized to 0 */
35
36 c->cpu_shares = 1024;
ddca82ac 37 c->memory_limit = (uint64_t) -1;
4ad49000
LP
38 c->blockio_weight = 1000;
39}
8e274523 40
4ad49000
LP
41void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
42 assert(c);
43 assert(a);
44
71fda00f 45 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
46 free(a->path);
47 free(a);
48}
49
50void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
51 assert(c);
52 assert(w);
53
71fda00f 54 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
55 free(w->path);
56 free(w);
57}
58
59void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
60 assert(c);
8e274523 61 assert(b);
8e274523 62
71fda00f 63 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
64 free(b->path);
65 free(b);
66}
67
68void cgroup_context_done(CGroupContext *c) {
69 assert(c);
70
71 while (c->blockio_device_weights)
72 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
73
74 while (c->blockio_device_bandwidths)
75 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
76
77 while (c->device_allow)
78 cgroup_context_free_device_allow(c, c->device_allow);
79}
80
81void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
82 CGroupBlockIODeviceBandwidth *b;
83 CGroupBlockIODeviceWeight *w;
84 CGroupDeviceAllow *a;
85
86 assert(c);
87 assert(f);
88
89 prefix = strempty(prefix);
90
91 fprintf(f,
92 "%sCPUAccounting=%s\n"
93 "%sBlockIOAccounting=%s\n"
94 "%sMemoryAccounting=%s\n"
95 "%sCPUShares=%lu\n"
112a7f46 96 "%sBlockIOWeight=%lu\n"
4ad49000 97 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
98 "%sDevicePolicy=%s\n",
99 prefix, yes_no(c->cpu_accounting),
100 prefix, yes_no(c->blockio_accounting),
101 prefix, yes_no(c->memory_accounting),
102 prefix, c->cpu_shares,
103 prefix, c->blockio_weight,
104 prefix, c->memory_limit,
4ad49000
LP
105 prefix, cgroup_device_policy_to_string(c->device_policy));
106
107 LIST_FOREACH(device_allow, a, c->device_allow)
108 fprintf(f,
109 "%sDeviceAllow=%s %s%s%s\n",
110 prefix,
111 a->path,
112 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
113
114 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
115 fprintf(f,
8e7076ca 116 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
117 prefix,
118 w->path,
119 w->weight);
120
121 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
122 char buf[FORMAT_BYTES_MAX];
123
124 fprintf(f,
125 "%s%s=%s %s\n",
126 prefix,
127 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
128 b->path,
129 format_bytes(buf, sizeof(buf), b->bandwidth));
130 }
131}
132
133static int lookup_blkio_device(const char *p, dev_t *dev) {
134 struct stat st;
135 int r;
136
137 assert(p);
138 assert(dev);
139
140 r = stat(p, &st);
ab1f0633 141 if (r < 0) {
4ad49000
LP
142 log_warning("Couldn't stat device %s: %m", p);
143 return -errno;
ab1f0633 144 }
8e274523 145
4ad49000
LP
146 if (S_ISBLK(st.st_mode))
147 *dev = st.st_rdev;
148 else if (major(st.st_dev) != 0) {
149 /* If this is not a device node then find the block
150 * device this file is stored on */
151 *dev = st.st_dev;
152
153 /* If this is a partition, try to get the originating
154 * block device */
155 block_get_whole_disk(*dev, dev);
156 } else {
157 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
158 return -ENODEV;
159 }
8e274523 160
8e274523 161 return 0;
8e274523
LP
162}
163
4ad49000
LP
164static int whitelist_device(const char *path, const char *node, const char *acc) {
165 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
166 struct stat st;
8c6db833 167 int r;
8e274523 168
4ad49000
LP
169 assert(path);
170 assert(acc);
8e274523 171
4ad49000
LP
172 if (stat(node, &st) < 0) {
173 log_warning("Couldn't stat device %s", node);
174 return -errno;
175 }
176
177 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
178 log_warning("%s is not a device.", node);
179 return -ENODEV;
180 }
181
182 sprintf(buf,
183 "%c %u:%u %s",
184 S_ISCHR(st.st_mode) ? 'c' : 'b',
185 major(st.st_rdev), minor(st.st_rdev),
186 acc);
187
188 r = cg_set_attribute("devices", path, "devices.allow", buf);
189 if (r < 0)
190 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
191
192 return r;
8e274523
LP
193}
194
90060676
LP
195static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
196 _cleanup_fclose_ FILE *f = NULL;
197 char line[LINE_MAX];
198 bool good = false;
199 int r;
200
201 assert(path);
202 assert(acc);
203 assert(type == 'b' || type == 'c');
204
205 f = fopen("/proc/devices", "re");
206 if (!f) {
207 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
208 return -errno;
209 }
210
211 FOREACH_LINE(line, f, goto fail) {
212 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
213 unsigned maj;
214
215 truncate_nl(line);
216
217 if (type == 'c' && streq(line, "Character devices:")) {
218 good = true;
219 continue;
220 }
221
222 if (type == 'b' && streq(line, "Block devices:")) {
223 good = true;
224 continue;
225 }
226
227 if (isempty(line)) {
228 good = false;
229 continue;
230 }
231
232 if (!good)
233 continue;
234
235 p = strstrip(line);
236
237 w = strpbrk(p, WHITESPACE);
238 if (!w)
239 continue;
240 *w = 0;
241
242 r = safe_atou(p, &maj);
243 if (r < 0)
244 continue;
245 if (maj <= 0)
246 continue;
247
248 w++;
249 w += strspn(w, WHITESPACE);
e41969e3
LP
250
251 if (fnmatch(name, w, 0) != 0)
90060676
LP
252 continue;
253
254 sprintf(buf,
255 "%c %u:* %s",
256 type,
257 maj,
258 acc);
259
260 r = cg_set_attribute("devices", path, "devices.allow", buf);
261 if (r < 0)
262 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
263 }
264
265 return 0;
266
267fail:
268 log_warning("Failed to read /proc/devices: %m");
269 return -errno;
270}
271
4ad49000 272void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
01efdf13 273 bool is_root;
4ad49000
LP
274 int r;
275
276 assert(c);
277 assert(path);
8e274523 278
4ad49000
LP
279 if (mask == 0)
280 return;
8e274523 281
01efdf13
LP
282 /* Some cgroup attributes are not support on the root cgroup,
283 * hence silently ignore */
284 is_root = isempty(path) || path_equal(path, "/");
285
286 if ((mask & CGROUP_CPU) && !is_root) {
4ad49000 287 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
8e274523 288
4ad49000
LP
289 sprintf(buf, "%lu\n", c->cpu_shares);
290 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
291 if (r < 0)
292 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
293 }
294
295 if (mask & CGROUP_BLKIO) {
296 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
297 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
298 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
299 CGroupBlockIODeviceWeight *w;
300 CGroupBlockIODeviceBandwidth *b;
301
01efdf13
LP
302 if (!is_root) {
303 sprintf(buf, "%lu\n", c->blockio_weight);
304 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
305 if (r < 0)
306 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 307
01efdf13
LP
308 /* FIXME: no way to reset this list */
309 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
310 dev_t dev;
4ad49000 311
01efdf13
LP
312 r = lookup_blkio_device(w->path, &dev);
313 if (r < 0)
314 continue;
8e274523 315
01efdf13
LP
316 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
317 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
318 if (r < 0)
319 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
320 }
4ad49000
LP
321 }
322
323 /* FIXME: no way to reset this list */
324 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
325 const char *a;
326 dev_t dev;
327
328 r = lookup_blkio_device(b->path, &dev);
329 if (r < 0)
330 continue;
331
332 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
333
334 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
335 r = cg_set_attribute("blkio", path, a, buf);
336 if (r < 0)
337 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 338 }
8e274523
LP
339 }
340
4ad49000 341 if (mask & CGROUP_MEMORY) {
6a94f2e9 342 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
343 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
344
6a94f2e9
G
345 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
346 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
347 } else
348 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 349
4ad49000
LP
350 if (r < 0)
351 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 352 }
8e274523 353
01efdf13 354 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 355 CGroupDeviceAllow *a;
8e274523 356
4ad49000
LP
357 if (c->device_allow || c->device_policy != CGROUP_AUTO)
358 r = cg_set_attribute("devices", path, "devices.deny", "a");
359 else
360 r = cg_set_attribute("devices", path, "devices.allow", "a");
361 if (r < 0)
01efdf13 362 log_warning("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 363
4ad49000
LP
364 if (c->device_policy == CGROUP_CLOSED ||
365 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
366 static const char auto_devices[] =
7d711efb
LP
367 "/dev/null\0" "rwm\0"
368 "/dev/zero\0" "rwm\0"
369 "/dev/full\0" "rwm\0"
370 "/dev/random\0" "rwm\0"
371 "/dev/urandom\0" "rwm\0"
372 "/dev/tty\0" "rwm\0"
373 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
374
375 const char *x, *y;
376
377 NULSTR_FOREACH_PAIR(x, y, auto_devices)
378 whitelist_device(path, x, y);
7d711efb
LP
379
380 whitelist_major(path, "pts", 'c', "rw");
381 whitelist_major(path, "kdbus", 'c', "rw");
382 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
383 }
384
385 LIST_FOREACH(device_allow, a, c->device_allow) {
386 char acc[4];
387 unsigned k = 0;
388
389 if (a->r)
390 acc[k++] = 'r';
391 if (a->w)
392 acc[k++] = 'w';
393 if (a->m)
394 acc[k++] = 'm';
fb385181 395
4ad49000
LP
396 if (k == 0)
397 continue;
fb385181 398
4ad49000 399 acc[k++] = 0;
90060676
LP
400
401 if (startswith(a->path, "/dev/"))
402 whitelist_device(path, a->path, acc);
403 else if (startswith(a->path, "block-"))
404 whitelist_major(path, a->path + 6, 'b', acc);
405 else if (startswith(a->path, "char-"))
406 whitelist_major(path, a->path + 5, 'c', acc);
407 else
408 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
409 }
410 }
fb385181
LP
411}
412
4ad49000
LP
413CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
414 CGroupControllerMask mask = 0;
8e274523 415
4ad49000 416 /* Figure out which controllers we need */
8e274523 417
4ad49000
LP
418 if (c->cpu_accounting || c->cpu_shares != 1024)
419 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 420
4ad49000
LP
421 if (c->blockio_accounting ||
422 c->blockio_weight != 1000 ||
423 c->blockio_device_weights ||
424 c->blockio_device_bandwidths)
425 mask |= CGROUP_BLKIO;
ecedd90f 426
4ad49000 427 if (c->memory_accounting ||
ddca82ac 428 c->memory_limit != (uint64_t) -1)
4ad49000 429 mask |= CGROUP_MEMORY;
8e274523 430
4ad49000
LP
431 if (c->device_allow || c->device_policy != CGROUP_AUTO)
432 mask |= CGROUP_DEVICE;
433
434 return mask;
8e274523
LP
435}
436
bc432dc7 437CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 438 CGroupContext *c;
8e274523 439
4ad49000
LP
440 c = unit_get_cgroup_context(u);
441 if (!c)
442 return 0;
8e274523 443
4ad49000 444 return cgroup_context_get_mask(c);
8e274523
LP
445}
446
bc432dc7 447CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 448 assert(u);
bc432dc7
LP
449
450 if (u->cgroup_members_mask_valid)
451 return u->cgroup_members_mask;
452
453 u->cgroup_members_mask = 0;
454
455 if (u->type == UNIT_SLICE) {
456 Unit *member;
457 Iterator i;
458
459 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
460
461 if (member == u)
462 continue;
463
d4fdc205 464 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
465 continue;
466
467 u->cgroup_members_mask |=
468 unit_get_cgroup_mask(member) |
469 unit_get_members_mask(member);
470 }
471 }
472
473 u->cgroup_members_mask_valid = true;
6414b7c9 474 return u->cgroup_members_mask;
246aa6dd
LP
475}
476
bc432dc7
LP
477CGroupControllerMask unit_get_siblings_mask(Unit *u) {
478 CGroupControllerMask m;
479
4ad49000 480 assert(u);
246aa6dd 481
bc432dc7
LP
482 if (UNIT_ISSET(u->slice))
483 m = unit_get_members_mask(UNIT_DEREF(u->slice));
484 else
485 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
4ad49000
LP
486
487 /* Sibling propagation is only relevant for weight-based
488 * controllers, so let's mask out everything else */
bc432dc7 489 return m & (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
246aa6dd
LP
490}
491
bc432dc7 492CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
493 CGroupControllerMask mask;
494
495 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
496 mask &= u->manager->cgroup_supported;
497
498 return mask;
499}
500
501/* Recurse from a unit up through its containing slices, propagating
502 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
503void unit_update_cgroup_members_masks(Unit *u) {
504 CGroupControllerMask m;
505 bool more;
506
507 assert(u);
508
509 /* Calculate subtree mask */
510 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
511
512 /* See if anything changed from the previous invocation. If
513 * not, we're done. */
514 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
515 return;
516
517 more =
518 u->cgroup_subtree_mask_valid &&
519 ((m & ~u->cgroup_subtree_mask) != 0) &&
520 ((~m & u->cgroup_subtree_mask) == 0);
521
522 u->cgroup_subtree_mask = m;
523 u->cgroup_subtree_mask_valid = true;
524
6414b7c9
DS
525 if (UNIT_ISSET(u->slice)) {
526 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
527
528 if (more)
529 /* There's more set now than before. We
530 * propagate the new mask to the parent's mask
531 * (not caring if it actually was valid or
532 * not). */
533
534 s->cgroup_members_mask |= m;
535
536 else
537 /* There's less set now than before (or we
538 * don't know), we need to recalculate
539 * everything, so let's invalidate the
540 * parent's members mask */
541
542 s->cgroup_members_mask_valid = false;
543
544 /* And now make sure that this change also hits our
545 * grandparents */
546 unit_update_cgroup_members_masks(s);
6414b7c9
DS
547 }
548}
549
03b90d4b
LP
550static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
551 Unit *u = userdata;
552
553 assert(mask != 0);
554 assert(u);
555
556 while (u) {
557 if (u->cgroup_path &&
558 u->cgroup_realized &&
559 (u->cgroup_realized_mask & mask) == mask)
560 return u->cgroup_path;
561
562 u = UNIT_DEREF(u->slice);
563 }
564
565 return NULL;
566}
567
4ad49000 568static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 569 _cleanup_free_ char *path = NULL;
bc432dc7 570 int r;
64747e2d 571
4ad49000 572 assert(u);
64747e2d 573
4ad49000
LP
574 path = unit_default_cgroup_path(u);
575 if (!path)
a94042fa 576 return log_oom();
64747e2d 577
0a1eb06d 578 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
579 if (r < 0) {
580 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 581 return r;
b58b8e11 582 }
03b90d4b 583 if (r > 0) {
b58b8e11 584 u->cgroup_path = path;
a94042fa 585 path = NULL;
b58b8e11
HH
586 }
587
03b90d4b
LP
588 /* First, create our own group */
589 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
590 if (r < 0) {
591 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
592 return r;
593 }
594
595 /* Keep track that this is now realized */
4ad49000 596 u->cgroup_realized = true;
bc432dc7 597 u->cgroup_realized_mask = mask;
4ad49000 598
03b90d4b
LP
599 /* Then, possibly move things over */
600 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
601 if (r < 0)
602 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
603
64747e2d
LP
604 return 0;
605}
606
6414b7c9 607static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
608 assert(u);
609
610 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
611}
612
613/* Check if necessary controllers and attributes for a unit are in place.
614 *
615 * If so, do nothing.
616 * If not, create paths, move processes over, and set attributes.
617 *
618 * Returns 0 on success and < 0 on failure. */
0a1eb06d 619static int unit_realize_cgroup_now(Unit *u) {
4ad49000 620 CGroupControllerMask mask;
6414b7c9 621 int r;
64747e2d 622
4ad49000 623 assert(u);
64747e2d 624
4ad49000 625 if (u->in_cgroup_queue) {
71fda00f 626 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
627 u->in_cgroup_queue = false;
628 }
64747e2d 629
6414b7c9 630 mask = unit_get_target_mask(u);
64747e2d 631
6414b7c9 632 if (unit_has_mask_realized(u, mask))
0a1eb06d 633 return 0;
64747e2d 634
4ad49000 635 /* First, realize parents */
6414b7c9
DS
636 if (UNIT_ISSET(u->slice)) {
637 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice));
638 if (r < 0)
639 return r;
640 }
4ad49000
LP
641
642 /* And then do the real work */
6414b7c9
DS
643 r = unit_create_cgroups(u, mask);
644 if (r < 0)
645 return r;
646
647 /* Finally, apply the necessary attributes. */
648 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path);
649
650 return 0;
64747e2d
LP
651}
652
4ad49000 653static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 654
4ad49000
LP
655 if (u->in_cgroup_queue)
656 return;
8e274523 657
71fda00f 658 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
659 u->in_cgroup_queue = true;
660}
8c6db833 661
4ad49000
LP
662unsigned manager_dispatch_cgroup_queue(Manager *m) {
663 Unit *i;
664 unsigned n = 0;
6414b7c9 665 int r;
ecedd90f 666
4ad49000
LP
667 while ((i = m->cgroup_queue)) {
668 assert(i->in_cgroup_queue);
ecedd90f 669
6414b7c9
DS
670 r = unit_realize_cgroup_now(i);
671 if (r < 0)
672 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 673
4ad49000
LP
674 n++;
675 }
ecedd90f 676
4ad49000 677 return n;
8e274523
LP
678}
679
4ad49000
LP
680static void unit_queue_siblings(Unit *u) {
681 Unit *slice;
ca949c9d 682
4ad49000
LP
683 /* This adds the siblings of the specified unit and the
684 * siblings of all parent units to the cgroup queue. (But
685 * neither the specified unit itself nor the parents.) */
686
687 while ((slice = UNIT_DEREF(u->slice))) {
688 Iterator i;
689 Unit *m;
8f53a7b8 690
4ad49000
LP
691 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
692 if (m == u)
693 continue;
8e274523 694
6414b7c9
DS
695 /* Skip units that have a dependency on the slice
696 * but aren't actually in it. */
4ad49000 697 if (UNIT_DEREF(m->slice) != slice)
50159e6a 698 continue;
8e274523 699
6414b7c9
DS
700 /* No point in doing cgroup application for units
701 * without active processes. */
702 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
703 continue;
704
705 /* If the unit doesn't need any new controllers
706 * and has current ones realized, it doesn't need
707 * any changes. */
708 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
709 continue;
710
4ad49000 711 unit_add_to_cgroup_queue(m);
50159e6a
LP
712 }
713
4ad49000 714 u = slice;
8e274523 715 }
4ad49000
LP
716}
717
0a1eb06d 718int unit_realize_cgroup(Unit *u) {
4ad49000
LP
719 CGroupContext *c;
720
721 assert(u);
722
723 c = unit_get_cgroup_context(u);
724 if (!c)
0a1eb06d 725 return 0;
8e274523 726
4ad49000
LP
727 /* So, here's the deal: when realizing the cgroups for this
728 * unit, we need to first create all parents, but there's more
729 * actually: for the weight-based controllers we also need to
730 * make sure that all our siblings (i.e. units that are in the
73e231ab 731 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
732 * would become very uneven as each of their processes would
733 * get as much resources as all our group together. This call
734 * will synchronously create the parent cgroups, but will
735 * defer work on the siblings to the next event loop
736 * iteration. */
ca949c9d 737
4ad49000
LP
738 /* Add all sibling slices to the cgroup queue. */
739 unit_queue_siblings(u);
740
6414b7c9 741 /* And realize this one now (and apply the values) */
bc432dc7 742 return unit_realize_cgroup_now(u);
8e274523
LP
743}
744
4ad49000 745void unit_destroy_cgroup(Unit *u) {
8e274523
LP
746 int r;
747
4ad49000 748 assert(u);
8e274523 749
4ad49000
LP
750 if (!u->cgroup_path)
751 return;
8e274523 752
13b84ec7 753 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 754 if (r < 0)
376dd21d 755 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 756
0a1eb06d
LP
757 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
758
4ad49000
LP
759 free(u->cgroup_path);
760 u->cgroup_path = NULL;
761 u->cgroup_realized = false;
bc432dc7 762 u->cgroup_realized_mask = 0;
0a1eb06d 763
8e274523
LP
764}
765
4ad49000
LP
766pid_t unit_search_main_pid(Unit *u) {
767 _cleanup_fclose_ FILE *f = NULL;
768 pid_t pid = 0, npid, mypid;
769
770 assert(u);
771
772 if (!u->cgroup_path)
773 return 0;
774
775 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
776 return 0;
777
778 mypid = getpid();
779 while (cg_read_pid(f, &npid) > 0) {
780 pid_t ppid;
781
782 if (npid == pid)
783 continue;
8e274523 784
4ad49000
LP
785 /* Ignore processes that aren't our kids */
786 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
787 continue;
8e274523 788
4ad49000
LP
789 if (pid != 0) {
790 /* Dang, there's more than one daemonized PID
791 in this group, so we don't know what process
792 is the main process. */
793 pid = 0;
794 break;
795 }
8e274523 796
4ad49000 797 pid = npid;
8e274523
LP
798 }
799
4ad49000 800 return pid;
8e274523
LP
801}
802
8e274523 803int manager_setup_cgroup(Manager *m) {
9444b1f2 804 _cleanup_free_ char *path = NULL;
15c60e99 805 char *e;
8e274523 806 int r;
8e274523
LP
807
808 assert(m);
809
e5a53dc7 810 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 811 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
812 log_warning("No control group support available, not creating root group.");
813 return 0;
814 }
815
35d2e7ec 816 /* 1. Determine hierarchy */
9444b1f2
LP
817 free(m->cgroup_root);
818 m->cgroup_root = NULL;
819
820 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 821 if (r < 0) {
12235040 822 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 823 return r;
12235040 824 }
8e274523 825
15c60e99
LP
826 /* LEGACY: Already in /system.slice? If so, let's cut this
827 * off. This is to support live upgrades from older systemd
828 * versions where PID 1 was moved there. */
9444b1f2
LP
829 if (m->running_as == SYSTEMD_SYSTEM) {
830 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
831 if (!e)
832 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
833 if (e)
834 *e = 0;
0baf24dd 835 }
7ccfb64a 836
9444b1f2
LP
837 /* And make sure to store away the root value without trailing
838 * slash, even for the root dir, so that we can easily prepend
839 * it everywhere. */
840 if (streq(m->cgroup_root, "/"))
841 m->cgroup_root[0] = 0;
8e274523 842
35d2e7ec 843 /* 2. Show data */
9444b1f2 844 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 845 if (r < 0) {
12235040 846 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 847 return r;
12235040 848 }
8e274523 849
c6c18be3
LP
850 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
851
35d2e7ec 852 /* 3. Install agent */
a32360f1
LP
853 if (m->running_as == SYSTEMD_SYSTEM) {
854 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
855 if (r < 0)
856 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
857 else if (r > 0)
858 log_debug("Installed release agent.");
859 else
860 log_debug("Release agent already installed.");
861 }
8e274523 862
15c60e99
LP
863 /* 4. Make sure we are in the root cgroup */
864 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
9156e799 865 if (r < 0) {
8e274523 866 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 867 return r;
c6c18be3
LP
868 }
869
35d2e7ec 870 /* 5. And pin it, so that it cannot be unmounted */
03e334a1 871 safe_close(m->pin_cgroupfs_fd);
c6c18be3 872
9156e799
LP
873 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
874 if (r < 0) {
12235040 875 log_error("Failed to open pin file: %m");
a32360f1 876 return -errno;
c6c18be3
LP
877 }
878
4ad49000
LP
879 /* 6. Figure out which controllers are supported */
880 m->cgroup_supported = cg_mask_supported();
9156e799 881
e58cec11
LP
882 /* 7. Always enable hierarchial support if it exists... */
883 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
884
a32360f1 885 return 0;
8e274523
LP
886}
887
c6c18be3 888void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
889 assert(m);
890
9444b1f2
LP
891 /* We can't really delete the group, since we are in it. But
892 * let's trim it. */
893 if (delete && m->cgroup_root)
894 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 895
03e334a1 896 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 897
9444b1f2
LP
898 free(m->cgroup_root);
899 m->cgroup_root = NULL;
8e274523
LP
900}
901
4ad49000 902Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 903 char *p;
4ad49000 904 Unit *u;
acb14d31
LP
905
906 assert(m);
907 assert(cgroup);
acb14d31 908
4ad49000
LP
909 u = hashmap_get(m->cgroup_unit, cgroup);
910 if (u)
911 return u;
acb14d31 912
8e70580b 913 p = strdupa(cgroup);
acb14d31
LP
914 for (;;) {
915 char *e;
916
917 e = strrchr(p, '/');
4ad49000
LP
918 if (e == p || !e)
919 return NULL;
acb14d31
LP
920
921 *e = 0;
922
4ad49000
LP
923 u = hashmap_get(m->cgroup_unit, p);
924 if (u)
925 return u;
acb14d31
LP
926 }
927}
928
4ad49000
LP
929Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
930 _cleanup_free_ char *cgroup = NULL;
acb14d31 931 int r;
8e274523 932
8c47c732
LP
933 assert(m);
934
935 if (pid <= 1)
936 return NULL;
937
4ad49000
LP
938 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
939 if (r < 0)
6dde1f33
LP
940 return NULL;
941
4ad49000 942 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 943}
4fbf50b3 944
4ad49000
LP
945int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
946 Unit *u;
947 int r;
4fbf50b3 948
4ad49000
LP
949 assert(m);
950 assert(cgroup);
4fbf50b3 951
4ad49000 952 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 953 if (u) {
06025d91
LP
954 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
955 if (r > 0) {
956 if (UNIT_VTABLE(u)->notify_cgroup_empty)
957 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 958
06025d91
LP
959 unit_add_to_gc_queue(u);
960 }
b56c28c3 961 }
2633eb83 962
4ad49000 963 return 0;
4fbf50b3
LP
964}
965
4ad49000
LP
966static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
967 [CGROUP_AUTO] = "auto",
968 [CGROUP_CLOSED] = "closed",
969 [CGROUP_STRICT] = "strict",
970};
4fbf50b3 971
4ad49000 972DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);