]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
swap: create .wants symlink to 'auto' swap devices
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
8c6db833 23
9eb977db 24#include "path-util.h"
9444b1f2 25#include "special.h"
4ad49000
LP
26#include "cgroup-util.h"
27#include "cgroup.h"
8e274523 28
4ad49000
LP
29void cgroup_context_init(CGroupContext *c) {
30 assert(c);
31
32 /* Initialize everything to the kernel defaults, assuming the
33 * structure is preinitialized to 0 */
34
35 c->cpu_shares = 1024;
36 c->memory_limit = c->memory_soft_limit = (uint64_t) -1;
37 c->blockio_weight = 1000;
38}
8e274523 39
4ad49000
LP
40void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
41 assert(c);
42 assert(a);
43
44 LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a);
45 free(a->path);
46 free(a);
47}
48
49void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
50 assert(c);
51 assert(w);
52
53 LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w);
54 free(w->path);
55 free(w);
56}
57
58void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
59 assert(c);
8e274523 60 assert(b);
8e274523 61
4ad49000
LP
62 LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b);
63 free(b->path);
64 free(b);
65}
66
67void cgroup_context_done(CGroupContext *c) {
68 assert(c);
69
70 while (c->blockio_device_weights)
71 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
72
73 while (c->blockio_device_bandwidths)
74 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
75
76 while (c->device_allow)
77 cgroup_context_free_device_allow(c, c->device_allow);
78}
79
80void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
81 CGroupBlockIODeviceBandwidth *b;
82 CGroupBlockIODeviceWeight *w;
83 CGroupDeviceAllow *a;
84
85 assert(c);
86 assert(f);
87
88 prefix = strempty(prefix);
89
90 fprintf(f,
91 "%sCPUAccounting=%s\n"
92 "%sBlockIOAccounting=%s\n"
93 "%sMemoryAccounting=%s\n"
94 "%sCPUShares=%lu\n"
95 "%sBlockIOWeight%lu\n"
96 "%sMemoryLimit=%" PRIu64 "\n"
97 "%sMemorySoftLimit=%" PRIu64 "\n"
98 "%sDevicePolicy=%s\n",
99 prefix, yes_no(c->cpu_accounting),
100 prefix, yes_no(c->blockio_accounting),
101 prefix, yes_no(c->memory_accounting),
102 prefix, c->cpu_shares,
103 prefix, c->blockio_weight,
104 prefix, c->memory_limit,
105 prefix, c->memory_soft_limit,
106 prefix, cgroup_device_policy_to_string(c->device_policy));
107
108 LIST_FOREACH(device_allow, a, c->device_allow)
109 fprintf(f,
110 "%sDeviceAllow=%s %s%s%s\n",
111 prefix,
112 a->path,
113 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
114
115 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
116 fprintf(f,
8e7076ca 117 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
118 prefix,
119 w->path,
120 w->weight);
121
122 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
123 char buf[FORMAT_BYTES_MAX];
124
125 fprintf(f,
126 "%s%s=%s %s\n",
127 prefix,
128 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
129 b->path,
130 format_bytes(buf, sizeof(buf), b->bandwidth));
131 }
132}
133
134static int lookup_blkio_device(const char *p, dev_t *dev) {
135 struct stat st;
136 int r;
137
138 assert(p);
139 assert(dev);
140
141 r = stat(p, &st);
ab1f0633 142 if (r < 0) {
4ad49000
LP
143 log_warning("Couldn't stat device %s: %m", p);
144 return -errno;
ab1f0633 145 }
8e274523 146
4ad49000
LP
147 if (S_ISBLK(st.st_mode))
148 *dev = st.st_rdev;
149 else if (major(st.st_dev) != 0) {
150 /* If this is not a device node then find the block
151 * device this file is stored on */
152 *dev = st.st_dev;
153
154 /* If this is a partition, try to get the originating
155 * block device */
156 block_get_whole_disk(*dev, dev);
157 } else {
158 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
159 return -ENODEV;
160 }
8e274523 161
8e274523 162 return 0;
8e274523
LP
163}
164
4ad49000
LP
165static int whitelist_device(const char *path, const char *node, const char *acc) {
166 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
167 struct stat st;
8c6db833 168 int r;
8e274523 169
4ad49000
LP
170 assert(path);
171 assert(acc);
8e274523 172
4ad49000
LP
173 if (stat(node, &st) < 0) {
174 log_warning("Couldn't stat device %s", node);
175 return -errno;
176 }
177
178 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
179 log_warning("%s is not a device.", node);
180 return -ENODEV;
181 }
182
183 sprintf(buf,
184 "%c %u:%u %s",
185 S_ISCHR(st.st_mode) ? 'c' : 'b',
186 major(st.st_rdev), minor(st.st_rdev),
187 acc);
188
189 r = cg_set_attribute("devices", path, "devices.allow", buf);
190 if (r < 0)
191 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
192
193 return r;
8e274523
LP
194}
195
4ad49000
LP
196void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
197 int r;
198
199 assert(c);
200 assert(path);
8e274523 201
4ad49000
LP
202 if (mask == 0)
203 return;
8e274523 204
4ad49000
LP
205 if (mask & CGROUP_CPU) {
206 char buf[DECIMAL_STR_MAX(unsigned long) + 1];
8e274523 207
4ad49000
LP
208 sprintf(buf, "%lu\n", c->cpu_shares);
209 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
210 if (r < 0)
211 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
212 }
213
214 if (mask & CGROUP_BLKIO) {
215 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
216 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
217 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
218 CGroupBlockIODeviceWeight *w;
219 CGroupBlockIODeviceBandwidth *b;
220
221 sprintf(buf, "%lu\n", c->blockio_weight);
222 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
223 if (r < 0)
224 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
225
226 /* FIXME: no way to reset this list */
227 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
228 dev_t dev;
229
230 r = lookup_blkio_device(w->path, &dev);
231 if (r < 0)
232 continue;
8e274523 233
4ad49000
LP
234 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
235 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
236 if (r < 0)
237 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
238 }
239
240 /* FIXME: no way to reset this list */
241 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
242 const char *a;
243 dev_t dev;
244
245 r = lookup_blkio_device(b->path, &dev);
246 if (r < 0)
247 continue;
248
249 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
250
251 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
252 r = cg_set_attribute("blkio", path, a, buf);
253 if (r < 0)
254 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 255 }
8e274523
LP
256 }
257
4ad49000
LP
258 if (mask & CGROUP_MEMORY) {
259 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
6a94f2e9
G
260 if (c->memory_limit != (uint64_t) -1) {
261 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
262 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
263 } else
264 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 265
4ad49000
LP
266 if (r < 0)
267 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
8e274523 268
6a94f2e9
G
269 if (c->memory_soft_limit != (uint64_t) -1) {
270 sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit);
271 r = cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf);
272 } else
273 r = cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", "-1");
274
4ad49000 275 if (r < 0)
84121bc2 276 log_error("Failed to set memory.soft_limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 277 }
8e274523 278
4ad49000
LP
279 if (mask & CGROUP_DEVICE) {
280 CGroupDeviceAllow *a;
8e274523 281
4ad49000
LP
282 if (c->device_allow || c->device_policy != CGROUP_AUTO)
283 r = cg_set_attribute("devices", path, "devices.deny", "a");
284 else
285 r = cg_set_attribute("devices", path, "devices.allow", "a");
286 if (r < 0)
287 log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 288
4ad49000
LP
289 if (c->device_policy == CGROUP_CLOSED ||
290 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
291 static const char auto_devices[] =
292 "/dev/null\0" "rw\0"
293 "/dev/zero\0" "rw\0"
294 "/dev/full\0" "rw\0"
295 "/dev/random\0" "rw\0"
296 "/dev/urandom\0" "rw\0";
297
298 const char *x, *y;
299
300 NULSTR_FOREACH_PAIR(x, y, auto_devices)
301 whitelist_device(path, x, y);
302 }
303
304 LIST_FOREACH(device_allow, a, c->device_allow) {
305 char acc[4];
306 unsigned k = 0;
307
308 if (a->r)
309 acc[k++] = 'r';
310 if (a->w)
311 acc[k++] = 'w';
312 if (a->m)
313 acc[k++] = 'm';
fb385181 314
4ad49000
LP
315 if (k == 0)
316 continue;
fb385181 317
4ad49000
LP
318 acc[k++] = 0;
319 whitelist_device(path, a->path, acc);
320 }
321 }
fb385181
LP
322}
323
4ad49000
LP
324CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
325 CGroupControllerMask mask = 0;
8e274523 326
4ad49000 327 /* Figure out which controllers we need */
8e274523 328
4ad49000
LP
329 if (c->cpu_accounting || c->cpu_shares != 1024)
330 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 331
4ad49000
LP
332 if (c->blockio_accounting ||
333 c->blockio_weight != 1000 ||
334 c->blockio_device_weights ||
335 c->blockio_device_bandwidths)
336 mask |= CGROUP_BLKIO;
ecedd90f 337
4ad49000
LP
338 if (c->memory_accounting ||
339 c->memory_limit != (uint64_t) -1 ||
340 c->memory_soft_limit != (uint64_t) -1)
341 mask |= CGROUP_MEMORY;
8e274523 342
4ad49000
LP
343 if (c->device_allow || c->device_policy != CGROUP_AUTO)
344 mask |= CGROUP_DEVICE;
345
346 return mask;
8e274523
LP
347}
348
4ad49000
LP
349static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
350 CGroupContext *c;
8e274523 351
4ad49000
LP
352 c = unit_get_cgroup_context(u);
353 if (!c)
354 return 0;
8e274523 355
4ad49000 356 return cgroup_context_get_mask(c);
8e274523
LP
357}
358
4ad49000
LP
359static CGroupControllerMask unit_get_members_mask(Unit *u) {
360 CGroupControllerMask mask = 0;
361 Unit *m;
362 Iterator i;
246aa6dd 363
4ad49000 364 assert(u);
246aa6dd 365
4ad49000 366 SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) {
246aa6dd 367
4ad49000 368 if (UNIT_DEREF(m->slice) != u)
246aa6dd
LP
369 continue;
370
4ad49000 371 mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m);
246aa6dd
LP
372 }
373
4ad49000 374 return mask;
246aa6dd
LP
375}
376
4ad49000
LP
377static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
378 assert(u);
246aa6dd 379
4ad49000
LP
380 if (!UNIT_ISSET(u->slice))
381 return 0;
382
383 /* Sibling propagation is only relevant for weight-based
384 * controllers, so let's mask out everything else */
385 return unit_get_members_mask(UNIT_DEREF(u->slice)) &
386 (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
246aa6dd
LP
387}
388
4ad49000
LP
389static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
390 char *path = NULL;
391 int r;
b58b8e11 392 bool is_in_hash = false;
64747e2d 393
4ad49000 394 assert(u);
64747e2d 395
4ad49000
LP
396 path = unit_default_cgroup_path(u);
397 if (!path)
398 return -ENOMEM;
64747e2d 399
0a1eb06d 400 r = hashmap_put(u->manager->cgroup_unit, path, u);
b58b8e11
HH
401 if (r == 0)
402 is_in_hash = true;
403
404 if (r < 0) {
b58b8e11 405 log_error("cgroup %s exists already: %s", path, strerror(-r));
81c68af0 406 free(path);
0a1eb06d 407 return r;
b58b8e11 408 }
0a1eb06d 409
4ad49000
LP
410 /* First, create our own group */
411 r = cg_create_with_mask(mask, path);
412 if (r < 0)
413 log_error("Failed to create cgroup %s: %s", path, strerror(-r));
64747e2d 414
4ad49000
LP
415 /* Then, possibly move things over */
416 if (u->cgroup_path && !streq(path, u->cgroup_path)) {
417 r = cg_migrate_with_mask(mask, u->cgroup_path, path);
64747e2d 418 if (r < 0)
4ad49000 419 log_error("Failed to migrate cgroup %s: %s", path, strerror(-r));
64747e2d
LP
420 }
421
b58b8e11
HH
422 if (!is_in_hash) {
423 /* And remember the new data */
424 free(u->cgroup_path);
425 u->cgroup_path = path;
426 }
427
4ad49000
LP
428 u->cgroup_realized = true;
429 u->cgroup_mask = mask;
430
64747e2d
LP
431 return 0;
432}
433
0a1eb06d 434static int unit_realize_cgroup_now(Unit *u) {
4ad49000 435 CGroupControllerMask mask;
64747e2d 436
4ad49000 437 assert(u);
64747e2d 438
4ad49000
LP
439 if (u->in_cgroup_queue) {
440 LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u);
441 u->in_cgroup_queue = false;
442 }
64747e2d 443
4ad49000
LP
444 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
445 mask &= u->manager->cgroup_supported;
64747e2d 446
4ad49000
LP
447 if (u->cgroup_realized &&
448 u->cgroup_mask == mask)
0a1eb06d 449 return 0;
64747e2d 450
4ad49000 451 /* First, realize parents */
3d040cf2
HH
452 if (UNIT_ISSET(u->slice))
453 unit_realize_cgroup_now(UNIT_DEREF(u->slice));
4ad49000
LP
454
455 /* And then do the real work */
0a1eb06d 456 return unit_create_cgroups(u, mask);
64747e2d
LP
457}
458
4ad49000 459static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 460
4ad49000
LP
461 if (u->in_cgroup_queue)
462 return;
8e274523 463
4ad49000
LP
464 LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u);
465 u->in_cgroup_queue = true;
466}
8c6db833 467
4ad49000
LP
468unsigned manager_dispatch_cgroup_queue(Manager *m) {
469 Unit *i;
470 unsigned n = 0;
ecedd90f 471
4ad49000
LP
472 while ((i = m->cgroup_queue)) {
473 assert(i->in_cgroup_queue);
ecedd90f 474
0a1eb06d
LP
475 if (unit_realize_cgroup_now(i) >= 0)
476 cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path);
477
4ad49000
LP
478 n++;
479 }
ecedd90f 480
4ad49000 481 return n;
8e274523
LP
482}
483
4ad49000
LP
484static void unit_queue_siblings(Unit *u) {
485 Unit *slice;
ca949c9d 486
4ad49000
LP
487 /* This adds the siblings of the specified unit and the
488 * siblings of all parent units to the cgroup queue. (But
489 * neither the specified unit itself nor the parents.) */
490
491 while ((slice = UNIT_DEREF(u->slice))) {
492 Iterator i;
493 Unit *m;
8f53a7b8 494
4ad49000
LP
495 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
496 if (m == u)
497 continue;
8e274523 498
4ad49000 499 if (UNIT_DEREF(m->slice) != slice)
50159e6a 500 continue;
8e274523 501
4ad49000 502 unit_add_to_cgroup_queue(m);
50159e6a
LP
503 }
504
4ad49000 505 u = slice;
8e274523 506 }
4ad49000
LP
507}
508
0a1eb06d 509int unit_realize_cgroup(Unit *u) {
4ad49000 510 CGroupContext *c;
0a1eb06d 511 int r;
4ad49000
LP
512
513 assert(u);
514
515 c = unit_get_cgroup_context(u);
516 if (!c)
0a1eb06d 517 return 0;
8e274523 518
4ad49000
LP
519 /* So, here's the deal: when realizing the cgroups for this
520 * unit, we need to first create all parents, but there's more
521 * actually: for the weight-based controllers we also need to
522 * make sure that all our siblings (i.e. units that are in the
523 * same slice as we are) have cgroup too. Otherwise things
524 * would become very uneven as each of their processes would
525 * get as much resources as all our group together. This call
526 * will synchronously create the parent cgroups, but will
527 * defer work on the siblings to the next event loop
528 * iteration. */
ca949c9d 529
4ad49000
LP
530 /* Add all sibling slices to the cgroup queue. */
531 unit_queue_siblings(u);
532
533 /* And realize this one now */
0a1eb06d 534 r = unit_realize_cgroup_now(u);
4ad49000
LP
535
536 /* And apply the values */
0a1eb06d
LP
537 if (r >= 0)
538 cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path);
539
540 return r;
8e274523
LP
541}
542
4ad49000 543void unit_destroy_cgroup(Unit *u) {
8e274523
LP
544 int r;
545
4ad49000 546 assert(u);
8e274523 547
4ad49000
LP
548 if (!u->cgroup_path)
549 return;
8e274523 550
8a841929 551 r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 552 if (r < 0)
376dd21d 553 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 554
0a1eb06d
LP
555 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
556
4ad49000
LP
557 free(u->cgroup_path);
558 u->cgroup_path = NULL;
559 u->cgroup_realized = false;
560 u->cgroup_mask = 0;
0a1eb06d 561
8e274523
LP
562}
563
4ad49000
LP
564pid_t unit_search_main_pid(Unit *u) {
565 _cleanup_fclose_ FILE *f = NULL;
566 pid_t pid = 0, npid, mypid;
567
568 assert(u);
569
570 if (!u->cgroup_path)
571 return 0;
572
573 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
574 return 0;
575
576 mypid = getpid();
577 while (cg_read_pid(f, &npid) > 0) {
578 pid_t ppid;
579
580 if (npid == pid)
581 continue;
8e274523 582
4ad49000
LP
583 /* Ignore processes that aren't our kids */
584 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
585 continue;
8e274523 586
4ad49000
LP
587 if (pid != 0) {
588 /* Dang, there's more than one daemonized PID
589 in this group, so we don't know what process
590 is the main process. */
591 pid = 0;
592 break;
593 }
8e274523 594
4ad49000 595 pid = npid;
8e274523
LP
596 }
597
4ad49000 598 return pid;
8e274523
LP
599}
600
8e274523 601int manager_setup_cgroup(Manager *m) {
9444b1f2 602 _cleanup_free_ char *path = NULL;
8e274523 603 int r;
9444b1f2 604 char *e, *a;
8e274523
LP
605
606 assert(m);
607
e5a53dc7 608 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 609 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
610 log_warning("No control group support available, not creating root group.");
611 return 0;
612 }
613
35d2e7ec 614 /* 1. Determine hierarchy */
9444b1f2
LP
615 free(m->cgroup_root);
616 m->cgroup_root = NULL;
617
618 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 619 if (r < 0) {
12235040 620 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 621 return r;
12235040 622 }
8e274523 623
9444b1f2
LP
624 /* Already in /system.slice? If so, let's cut this off again */
625 if (m->running_as == SYSTEMD_SYSTEM) {
626 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
627 if (e)
628 *e = 0;
0baf24dd 629 }
7ccfb64a 630
9444b1f2
LP
631 /* And make sure to store away the root value without trailing
632 * slash, even for the root dir, so that we can easily prepend
633 * it everywhere. */
634 if (streq(m->cgroup_root, "/"))
635 m->cgroup_root[0] = 0;
8e274523 636
35d2e7ec 637 /* 2. Show data */
9444b1f2 638 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 639 if (r < 0) {
12235040 640 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 641 return r;
12235040 642 }
8e274523 643
c6c18be3
LP
644 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
645
35d2e7ec 646 /* 3. Install agent */
a32360f1
LP
647 if (m->running_as == SYSTEMD_SYSTEM) {
648 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
649 if (r < 0)
650 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
651 else if (r > 0)
652 log_debug("Installed release agent.");
653 else
654 log_debug("Release agent already installed.");
655 }
8e274523 656
9444b1f2 657 /* 4. Realize the system slice and put us in there */
be2c1bd2
LP
658 if (m->running_as == SYSTEMD_SYSTEM) {
659 a = strappenda(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
660 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, a, 0);
661 } else
662 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
9156e799 663 if (r < 0) {
8e274523 664 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 665 return r;
c6c18be3
LP
666 }
667
35d2e7ec 668 /* 5. And pin it, so that it cannot be unmounted */
c6c18be3
LP
669 if (m->pin_cgroupfs_fd >= 0)
670 close_nointr_nofail(m->pin_cgroupfs_fd);
671
9156e799
LP
672 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
673 if (r < 0) {
12235040 674 log_error("Failed to open pin file: %m");
a32360f1 675 return -errno;
c6c18be3
LP
676 }
677
4ad49000
LP
678 /* 6. Figure out which controllers are supported */
679 m->cgroup_supported = cg_mask_supported();
9156e799 680
a32360f1 681 return 0;
8e274523
LP
682}
683
c6c18be3 684void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
685 assert(m);
686
9444b1f2
LP
687 /* We can't really delete the group, since we are in it. But
688 * let's trim it. */
689 if (delete && m->cgroup_root)
690 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 691
c6c18be3
LP
692 if (m->pin_cgroupfs_fd >= 0) {
693 close_nointr_nofail(m->pin_cgroupfs_fd);
694 m->pin_cgroupfs_fd = -1;
695 }
696
9444b1f2
LP
697 free(m->cgroup_root);
698 m->cgroup_root = NULL;
8e274523
LP
699}
700
4ad49000 701Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 702 char *p;
4ad49000 703 Unit *u;
acb14d31
LP
704
705 assert(m);
706 assert(cgroup);
acb14d31 707
4ad49000
LP
708 u = hashmap_get(m->cgroup_unit, cgroup);
709 if (u)
710 return u;
acb14d31 711
8e70580b 712 p = strdupa(cgroup);
acb14d31
LP
713 for (;;) {
714 char *e;
715
716 e = strrchr(p, '/');
4ad49000
LP
717 if (e == p || !e)
718 return NULL;
acb14d31
LP
719
720 *e = 0;
721
4ad49000
LP
722 u = hashmap_get(m->cgroup_unit, p);
723 if (u)
724 return u;
acb14d31
LP
725 }
726}
727
4ad49000
LP
728Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
729 _cleanup_free_ char *cgroup = NULL;
acb14d31 730 int r;
8e274523 731
8c47c732
LP
732 assert(m);
733
734 if (pid <= 1)
735 return NULL;
736
4ad49000
LP
737 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
738 if (r < 0)
6dde1f33
LP
739 return NULL;
740
4ad49000 741 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 742}
4fbf50b3 743
4ad49000
LP
744int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
745 Unit *u;
746 int r;
4fbf50b3 747
4ad49000
LP
748 assert(m);
749 assert(cgroup);
4fbf50b3 750
4ad49000 751 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 752 if (u) {
06025d91
LP
753 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
754 if (r > 0) {
755 if (UNIT_VTABLE(u)->notify_cgroup_empty)
756 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 757
06025d91
LP
758 unit_add_to_gc_queue(u);
759 }
b56c28c3 760 }
2633eb83 761
4ad49000 762 return 0;
4fbf50b3
LP
763}
764
4ad49000
LP
765static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
766 [CGROUP_AUTO] = "auto",
767 [CGROUP_CLOSED] = "closed",
768 [CGROUP_STRICT] = "strict",
769};
4fbf50b3 770
4ad49000 771DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);