]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
cgroup: readd proper cgroup empty tracking
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
8c6db833 23
9eb977db 24#include "path-util.h"
9444b1f2 25#include "special.h"
4ad49000
LP
26#include "cgroup-util.h"
27#include "cgroup.h"
8e274523 28
4ad49000
LP
29void cgroup_context_init(CGroupContext *c) {
30 assert(c);
31
32 /* Initialize everything to the kernel defaults, assuming the
33 * structure is preinitialized to 0 */
34
35 c->cpu_shares = 1024;
36 c->memory_limit = c->memory_soft_limit = (uint64_t) -1;
37 c->blockio_weight = 1000;
38}
8e274523 39
4ad49000
LP
40void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
41 assert(c);
42 assert(a);
43
44 LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a);
45 free(a->path);
46 free(a);
47}
48
49void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
50 assert(c);
51 assert(w);
52
53 LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w);
54 free(w->path);
55 free(w);
56}
57
58void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
59 assert(c);
8e274523 60 assert(b);
8e274523 61
4ad49000
LP
62 LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b);
63 free(b->path);
64 free(b);
65}
66
67void cgroup_context_done(CGroupContext *c) {
68 assert(c);
69
70 while (c->blockio_device_weights)
71 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
72
73 while (c->blockio_device_bandwidths)
74 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
75
76 while (c->device_allow)
77 cgroup_context_free_device_allow(c, c->device_allow);
78}
79
80void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
81 CGroupBlockIODeviceBandwidth *b;
82 CGroupBlockIODeviceWeight *w;
83 CGroupDeviceAllow *a;
84
85 assert(c);
86 assert(f);
87
88 prefix = strempty(prefix);
89
90 fprintf(f,
91 "%sCPUAccounting=%s\n"
92 "%sBlockIOAccounting=%s\n"
93 "%sMemoryAccounting=%s\n"
94 "%sCPUShares=%lu\n"
95 "%sBlockIOWeight%lu\n"
96 "%sMemoryLimit=%" PRIu64 "\n"
97 "%sMemorySoftLimit=%" PRIu64 "\n"
98 "%sDevicePolicy=%s\n",
99 prefix, yes_no(c->cpu_accounting),
100 prefix, yes_no(c->blockio_accounting),
101 prefix, yes_no(c->memory_accounting),
102 prefix, c->cpu_shares,
103 prefix, c->blockio_weight,
104 prefix, c->memory_limit,
105 prefix, c->memory_soft_limit,
106 prefix, cgroup_device_policy_to_string(c->device_policy));
107
108 LIST_FOREACH(device_allow, a, c->device_allow)
109 fprintf(f,
110 "%sDeviceAllow=%s %s%s%s\n",
111 prefix,
112 a->path,
113 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
114
115 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
116 fprintf(f,
117 "%sBlockIOWeight=%s %lu",
118 prefix,
119 w->path,
120 w->weight);
121
122 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
123 char buf[FORMAT_BYTES_MAX];
124
125 fprintf(f,
126 "%s%s=%s %s\n",
127 prefix,
128 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
129 b->path,
130 format_bytes(buf, sizeof(buf), b->bandwidth));
131 }
132}
133
134static int lookup_blkio_device(const char *p, dev_t *dev) {
135 struct stat st;
136 int r;
137
138 assert(p);
139 assert(dev);
140
141 r = stat(p, &st);
ab1f0633 142 if (r < 0) {
4ad49000
LP
143 log_warning("Couldn't stat device %s: %m", p);
144 return -errno;
ab1f0633 145 }
8e274523 146
4ad49000
LP
147 if (S_ISBLK(st.st_mode))
148 *dev = st.st_rdev;
149 else if (major(st.st_dev) != 0) {
150 /* If this is not a device node then find the block
151 * device this file is stored on */
152 *dev = st.st_dev;
153
154 /* If this is a partition, try to get the originating
155 * block device */
156 block_get_whole_disk(*dev, dev);
157 } else {
158 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
159 return -ENODEV;
160 }
8e274523 161
8e274523 162 return 0;
8e274523
LP
163}
164
4ad49000
LP
165static int whitelist_device(const char *path, const char *node, const char *acc) {
166 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
167 struct stat st;
8c6db833 168 int r;
8e274523 169
4ad49000
LP
170 assert(path);
171 assert(acc);
8e274523 172
4ad49000
LP
173 if (stat(node, &st) < 0) {
174 log_warning("Couldn't stat device %s", node);
175 return -errno;
176 }
177
178 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
179 log_warning("%s is not a device.", node);
180 return -ENODEV;
181 }
182
183 sprintf(buf,
184 "%c %u:%u %s",
185 S_ISCHR(st.st_mode) ? 'c' : 'b',
186 major(st.st_rdev), minor(st.st_rdev),
187 acc);
188
189 r = cg_set_attribute("devices", path, "devices.allow", buf);
190 if (r < 0)
191 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
192
193 return r;
8e274523
LP
194}
195
4ad49000
LP
196void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
197 int r;
198
199 assert(c);
200 assert(path);
8e274523 201
4ad49000
LP
202 if (mask == 0)
203 return;
8e274523 204
4ad49000
LP
205 if (mask & CGROUP_CPU) {
206 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
8e274523 207
4ad49000
LP
208 sprintf(buf, "%lu\n", c->cpu_shares);
209 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
210 if (r < 0)
211 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
212 }
213
214 if (mask & CGROUP_BLKIO) {
215 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
216 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
217 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
218 CGroupBlockIODeviceWeight *w;
219 CGroupBlockIODeviceBandwidth *b;
220
221 sprintf(buf, "%lu\n", c->blockio_weight);
222 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
223 if (r < 0)
224 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
225
226 /* FIXME: no way to reset this list */
227 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
228 dev_t dev;
229
230 r = lookup_blkio_device(w->path, &dev);
231 if (r < 0)
232 continue;
8e274523 233
4ad49000
LP
234 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
235 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
236 if (r < 0)
237 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
238 }
239
240 /* FIXME: no way to reset this list */
241 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
242 const char *a;
243 dev_t dev;
244
245 r = lookup_blkio_device(b->path, &dev);
246 if (r < 0)
247 continue;
248
249 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
250
251 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
252 r = cg_set_attribute("blkio", path, a, buf);
253 if (r < 0)
254 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 255 }
8e274523
LP
256 }
257
4ad49000
LP
258 if (mask & CGROUP_MEMORY) {
259 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
8e274523 260
4ad49000
LP
261 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
262 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
263 if (r < 0)
264 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
8e274523 265
4ad49000
LP
266 sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit);
267 cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf);
268 if (r < 0)
269 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
270 }
8e274523 271
4ad49000
LP
272 if (mask & CGROUP_DEVICE) {
273 CGroupDeviceAllow *a;
8e274523 274
4ad49000
LP
275 if (c->device_allow || c->device_policy != CGROUP_AUTO)
276 r = cg_set_attribute("devices", path, "devices.deny", "a");
277 else
278 r = cg_set_attribute("devices", path, "devices.allow", "a");
279 if (r < 0)
280 log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 281
4ad49000
LP
282 if (c->device_policy == CGROUP_CLOSED ||
283 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
284 static const char auto_devices[] =
285 "/dev/null\0" "rw\0"
286 "/dev/zero\0" "rw\0"
287 "/dev/full\0" "rw\0"
288 "/dev/random\0" "rw\0"
289 "/dev/urandom\0" "rw\0";
290
291 const char *x, *y;
292
293 NULSTR_FOREACH_PAIR(x, y, auto_devices)
294 whitelist_device(path, x, y);
295 }
296
297 LIST_FOREACH(device_allow, a, c->device_allow) {
298 char acc[4];
299 unsigned k = 0;
300
301 if (a->r)
302 acc[k++] = 'r';
303 if (a->w)
304 acc[k++] = 'w';
305 if (a->m)
306 acc[k++] = 'm';
fb385181 307
4ad49000
LP
308 if (k == 0)
309 continue;
fb385181 310
4ad49000
LP
311 acc[k++] = 0;
312 whitelist_device(path, a->path, acc);
313 }
314 }
fb385181
LP
315}
316
4ad49000
LP
317CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
318 CGroupControllerMask mask = 0;
8e274523 319
4ad49000 320 /* Figure out which controllers we need */
8e274523 321
4ad49000
LP
322 if (c->cpu_accounting || c->cpu_shares != 1024)
323 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 324
4ad49000
LP
325 if (c->blockio_accounting ||
326 c->blockio_weight != 1000 ||
327 c->blockio_device_weights ||
328 c->blockio_device_bandwidths)
329 mask |= CGROUP_BLKIO;
ecedd90f 330
4ad49000
LP
331 if (c->memory_accounting ||
332 c->memory_limit != (uint64_t) -1 ||
333 c->memory_soft_limit != (uint64_t) -1)
334 mask |= CGROUP_MEMORY;
8e274523 335
4ad49000
LP
336 if (c->device_allow || c->device_policy != CGROUP_AUTO)
337 mask |= CGROUP_DEVICE;
338
339 return mask;
8e274523
LP
340}
341
4ad49000
LP
342static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
343 CGroupContext *c;
8e274523 344
4ad49000
LP
345 c = unit_get_cgroup_context(u);
346 if (!c)
347 return 0;
8e274523 348
4ad49000 349 return cgroup_context_get_mask(c);
8e274523
LP
350}
351
4ad49000
LP
352static CGroupControllerMask unit_get_members_mask(Unit *u) {
353 CGroupControllerMask mask = 0;
354 Unit *m;
355 Iterator i;
246aa6dd 356
4ad49000 357 assert(u);
246aa6dd 358
4ad49000 359 SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) {
246aa6dd 360
4ad49000 361 if (UNIT_DEREF(m->slice) != u)
246aa6dd
LP
362 continue;
363
4ad49000 364 mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m);
246aa6dd
LP
365 }
366
4ad49000 367 return mask;
246aa6dd
LP
368}
369
4ad49000
LP
370static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
371 assert(u);
246aa6dd 372
4ad49000
LP
373 if (!UNIT_ISSET(u->slice))
374 return 0;
375
376 /* Sibling propagation is only relevant for weight-based
377 * controllers, so let's mask out everything else */
378 return unit_get_members_mask(UNIT_DEREF(u->slice)) &
379 (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
246aa6dd
LP
380}
381
4ad49000
LP
382static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
383 char *path = NULL;
384 int r;
64747e2d 385
4ad49000 386 assert(u);
64747e2d 387
4ad49000
LP
388 path = unit_default_cgroup_path(u);
389 if (!path)
390 return -ENOMEM;
64747e2d 391
0a1eb06d
LP
392 r = hashmap_put(u->manager->cgroup_unit, path, u);
393 if (r < 0)
394 return r;
395
4ad49000
LP
396 /* First, create our own group */
397 r = cg_create_with_mask(mask, path);
398 if (r < 0)
399 log_error("Failed to create cgroup %s: %s", path, strerror(-r));
64747e2d 400
4ad49000
LP
401 /* Then, possibly move things over */
402 if (u->cgroup_path && !streq(path, u->cgroup_path)) {
403 r = cg_migrate_with_mask(mask, u->cgroup_path, path);
64747e2d 404 if (r < 0)
4ad49000 405 log_error("Failed to migrate cgroup %s: %s", path, strerror(-r));
64747e2d
LP
406 }
407
4ad49000
LP
408 /* And remember the new data */
409 free(u->cgroup_path);
410 u->cgroup_path = path;
411 u->cgroup_realized = true;
412 u->cgroup_mask = mask;
413
64747e2d
LP
414 return 0;
415}
416
0a1eb06d 417static int unit_realize_cgroup_now(Unit *u) {
4ad49000 418 CGroupControllerMask mask;
64747e2d 419
4ad49000 420 assert(u);
64747e2d 421
4ad49000
LP
422 if (u->in_cgroup_queue) {
423 LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u);
424 u->in_cgroup_queue = false;
425 }
64747e2d 426
4ad49000
LP
427 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
428 mask &= u->manager->cgroup_supported;
64747e2d 429
4ad49000
LP
430 if (u->cgroup_realized &&
431 u->cgroup_mask == mask)
0a1eb06d 432 return 0;
64747e2d 433
4ad49000
LP
434 /* First, realize parents */
435 if (UNIT_ISSET(u->slice))
436 unit_realize_cgroup_now(UNIT_DEREF(u->slice));
437
438 /* And then do the real work */
0a1eb06d 439 return unit_create_cgroups(u, mask);
64747e2d
LP
440}
441
4ad49000 442static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 443
4ad49000
LP
444 if (u->in_cgroup_queue)
445 return;
8e274523 446
4ad49000
LP
447 LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u);
448 u->in_cgroup_queue = true;
449}
8c6db833 450
4ad49000
LP
451unsigned manager_dispatch_cgroup_queue(Manager *m) {
452 Unit *i;
453 unsigned n = 0;
ecedd90f 454
4ad49000
LP
455 while ((i = m->cgroup_queue)) {
456 assert(i->in_cgroup_queue);
ecedd90f 457
0a1eb06d
LP
458 if (unit_realize_cgroup_now(i) >= 0)
459 cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path);
460
4ad49000
LP
461 n++;
462 }
ecedd90f 463
4ad49000 464 return n;
8e274523
LP
465}
466
4ad49000
LP
467static void unit_queue_siblings(Unit *u) {
468 Unit *slice;
ca949c9d 469
4ad49000
LP
470 /* This adds the siblings of the specified unit and the
471 * siblings of all parent units to the cgroup queue. (But
472 * neither the specified unit itself nor the parents.) */
473
474 while ((slice = UNIT_DEREF(u->slice))) {
475 Iterator i;
476 Unit *m;
8f53a7b8 477
4ad49000
LP
478 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
479 if (m == u)
480 continue;
8e274523 481
4ad49000 482 if (UNIT_DEREF(m->slice) != slice)
50159e6a 483 continue;
8e274523 484
4ad49000 485 unit_add_to_cgroup_queue(m);
50159e6a
LP
486 }
487
4ad49000 488 u = slice;
8e274523 489 }
4ad49000
LP
490}
491
0a1eb06d 492int unit_realize_cgroup(Unit *u) {
4ad49000 493 CGroupContext *c;
0a1eb06d 494 int r;
4ad49000
LP
495
496 assert(u);
497
498 c = unit_get_cgroup_context(u);
499 if (!c)
0a1eb06d 500 return 0;
8e274523 501
4ad49000
LP
502 /* So, here's the deal: when realizing the cgroups for this
503 * unit, we need to first create all parents, but there's more
504 * actually: for the weight-based controllers we also need to
505 * make sure that all our siblings (i.e. units that are in the
506 * same slice as we are) have cgroup too. Otherwise things
507 * would become very uneven as each of their processes would
508 * get as much resources as all our group together. This call
509 * will synchronously create the parent cgroups, but will
510 * defer work on the siblings to the next event loop
511 * iteration. */
ca949c9d 512
4ad49000
LP
513 /* Add all sibling slices to the cgroup queue. */
514 unit_queue_siblings(u);
515
516 /* And realize this one now */
0a1eb06d 517 r = unit_realize_cgroup_now(u);
4ad49000
LP
518
519 /* And apply the values */
0a1eb06d
LP
520 if (r >= 0)
521 cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path);
522
523 return r;
8e274523
LP
524}
525
4ad49000 526void unit_destroy_cgroup(Unit *u) {
8e274523
LP
527 int r;
528
4ad49000 529 assert(u);
8e274523 530
4ad49000
LP
531 if (!u->cgroup_path)
532 return;
8e274523 533
4ad49000
LP
534 r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, true);
535 if (r < 0)
536 log_error("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 537
0a1eb06d
LP
538 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
539
4ad49000
LP
540 free(u->cgroup_path);
541 u->cgroup_path = NULL;
542 u->cgroup_realized = false;
543 u->cgroup_mask = 0;
0a1eb06d 544
8e274523
LP
545}
546
4ad49000
LP
547pid_t unit_search_main_pid(Unit *u) {
548 _cleanup_fclose_ FILE *f = NULL;
549 pid_t pid = 0, npid, mypid;
550
551 assert(u);
552
553 if (!u->cgroup_path)
554 return 0;
555
556 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
557 return 0;
558
559 mypid = getpid();
560 while (cg_read_pid(f, &npid) > 0) {
561 pid_t ppid;
562
563 if (npid == pid)
564 continue;
8e274523 565
4ad49000
LP
566 /* Ignore processes that aren't our kids */
567 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
568 continue;
8e274523 569
4ad49000
LP
570 if (pid != 0) {
571 /* Dang, there's more than one daemonized PID
572 in this group, so we don't know what process
573 is the main process. */
574 pid = 0;
575 break;
576 }
8e274523 577
4ad49000 578 pid = npid;
8e274523
LP
579 }
580
4ad49000 581 return pid;
8e274523
LP
582}
583
8e274523 584int manager_setup_cgroup(Manager *m) {
9444b1f2 585 _cleanup_free_ char *path = NULL;
8e274523 586 int r;
9444b1f2 587 char *e, *a;
8e274523
LP
588
589 assert(m);
590
e5a53dc7 591 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 592 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
593 log_warning("No control group support available, not creating root group.");
594 return 0;
595 }
596
35d2e7ec 597 /* 1. Determine hierarchy */
9444b1f2
LP
598 free(m->cgroup_root);
599 m->cgroup_root = NULL;
600
601 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 602 if (r < 0) {
12235040 603 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 604 return r;
12235040 605 }
8e274523 606
9444b1f2
LP
607 /* Already in /system.slice? If so, let's cut this off again */
608 if (m->running_as == SYSTEMD_SYSTEM) {
609 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
610 if (e)
611 *e = 0;
0baf24dd 612 }
7ccfb64a 613
9444b1f2
LP
614 /* And make sure to store away the root value without trailing
615 * slash, even for the root dir, so that we can easily prepend
616 * it everywhere. */
617 if (streq(m->cgroup_root, "/"))
618 m->cgroup_root[0] = 0;
8e274523 619
35d2e7ec 620 /* 2. Show data */
9444b1f2 621 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 622 if (r < 0) {
12235040 623 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 624 return r;
12235040 625 }
8e274523 626
c6c18be3
LP
627 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
628
35d2e7ec 629 /* 3. Install agent */
a32360f1
LP
630 if (m->running_as == SYSTEMD_SYSTEM) {
631 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
632 if (r < 0)
633 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
634 else if (r > 0)
635 log_debug("Installed release agent.");
636 else
637 log_debug("Release agent already installed.");
638 }
8e274523 639
9444b1f2
LP
640 /* 4. Realize the system slice and put us in there */
641 a = strappenda(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
642 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, a, 0);
9156e799 643 if (r < 0) {
8e274523 644 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 645 return r;
c6c18be3
LP
646 }
647
35d2e7ec 648 /* 5. And pin it, so that it cannot be unmounted */
c6c18be3
LP
649 if (m->pin_cgroupfs_fd >= 0)
650 close_nointr_nofail(m->pin_cgroupfs_fd);
651
9156e799
LP
652 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
653 if (r < 0) {
12235040 654 log_error("Failed to open pin file: %m");
a32360f1 655 return -errno;
c6c18be3
LP
656 }
657
4ad49000
LP
658 /* 6. Figure out which controllers are supported */
659 m->cgroup_supported = cg_mask_supported();
9156e799 660
a32360f1 661 return 0;
8e274523
LP
662}
663
c6c18be3 664void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
665 assert(m);
666
9444b1f2
LP
667 /* We can't really delete the group, since we are in it. But
668 * let's trim it. */
669 if (delete && m->cgroup_root)
670 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 671
c6c18be3
LP
672 if (m->pin_cgroupfs_fd >= 0) {
673 close_nointr_nofail(m->pin_cgroupfs_fd);
674 m->pin_cgroupfs_fd = -1;
675 }
676
9444b1f2
LP
677 free(m->cgroup_root);
678 m->cgroup_root = NULL;
8e274523
LP
679}
680
4ad49000 681Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 682 char *p;
4ad49000 683 Unit *u;
acb14d31
LP
684
685 assert(m);
686 assert(cgroup);
acb14d31 687
4ad49000
LP
688 u = hashmap_get(m->cgroup_unit, cgroup);
689 if (u)
690 return u;
acb14d31 691
8e70580b 692 p = strdupa(cgroup);
acb14d31
LP
693 for (;;) {
694 char *e;
695
696 e = strrchr(p, '/');
4ad49000
LP
697 if (e == p || !e)
698 return NULL;
acb14d31
LP
699
700 *e = 0;
701
4ad49000
LP
702 u = hashmap_get(m->cgroup_unit, p);
703 if (u)
704 return u;
acb14d31
LP
705 }
706}
707
4ad49000
LP
708Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
709 _cleanup_free_ char *cgroup = NULL;
acb14d31 710 int r;
8e274523 711
8c47c732
LP
712 assert(m);
713
714 if (pid <= 1)
715 return NULL;
716
4ad49000
LP
717 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
718 if (r < 0)
6dde1f33
LP
719 return NULL;
720
4ad49000 721 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 722}
4fbf50b3 723
4ad49000
LP
724int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
725 Unit *u;
726 int r;
4fbf50b3 727
4ad49000
LP
728 assert(m);
729 assert(cgroup);
4fbf50b3 730
4ad49000
LP
731 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup, true);
732 if (r == 0)
4fbf50b3
LP
733 return 0;
734
4ad49000
LP
735 u = manager_get_unit_by_cgroup(m, cgroup);
736 if (u && UNIT_VTABLE(u)->notify_cgroup_empty)
737 UNIT_VTABLE(u)->notify_cgroup_empty(u);
2633eb83 738
4ad49000 739 return 0;
4fbf50b3
LP
740}
741
4ad49000
LP
742static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
743 [CGROUP_AUTO] = "auto",
744 [CGROUP_CLOSED] = "closed",
745 [CGROUP_STRICT] = "strict",
746};
4fbf50b3 747
4ad49000 748DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);