]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
Add quotes to warning message
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
9a054909
LP
30#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
31
4ad49000
LP
32void cgroup_context_init(CGroupContext *c) {
33 assert(c);
34
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
37
db785129
LP
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
ddca82ac 40 c->memory_limit = (uint64_t) -1;
db785129
LP
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
b2f8b02e
LP
43
44 c->cpu_quota_per_sec_usec = (usec_t) -1;
4ad49000 45}
8e274523 46
4ad49000
LP
47void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
48 assert(c);
49 assert(a);
50
71fda00f 51 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
52 free(a->path);
53 free(a);
54}
55
56void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
57 assert(c);
58 assert(w);
59
71fda00f 60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
61 free(w->path);
62 free(w);
63}
64
65void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
66 assert(c);
8e274523 67 assert(b);
8e274523 68
71fda00f 69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
70 free(b->path);
71 free(b);
72}
73
74void cgroup_context_done(CGroupContext *c) {
75 assert(c);
76
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
79
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
82
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
85}
86
87void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
90 CGroupDeviceAllow *a;
9a054909 91 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
92
93 assert(c);
94 assert(f);
95
96 prefix = strempty(prefix);
97
98 fprintf(f,
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
102 "%sCPUShares=%lu\n"
95ae05c0 103 "%sStartupCPUShares=%lu\n"
b2f8b02e 104 "%sCPUQuotaPerSecSec=%s\n"
112a7f46 105 "%sBlockIOWeight=%lu\n"
95ae05c0 106 "%sStartupBlockIOWeight=%lu\n"
4ad49000 107 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
108 "%sDevicePolicy=%s\n",
109 prefix, yes_no(c->cpu_accounting),
110 prefix, yes_no(c->blockio_accounting),
111 prefix, yes_no(c->memory_accounting),
112 prefix, c->cpu_shares,
95ae05c0 113 prefix, c->startup_cpu_shares,
9a054909 114 prefix, strna(format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1)),
4ad49000 115 prefix, c->blockio_weight,
95ae05c0 116 prefix, c->startup_blockio_weight,
4ad49000 117 prefix, c->memory_limit,
4ad49000
LP
118 prefix, cgroup_device_policy_to_string(c->device_policy));
119
120 LIST_FOREACH(device_allow, a, c->device_allow)
121 fprintf(f,
122 "%sDeviceAllow=%s %s%s%s\n",
123 prefix,
124 a->path,
125 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
126
127 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
128 fprintf(f,
8e7076ca 129 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
130 prefix,
131 w->path,
132 w->weight);
133
134 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
135 char buf[FORMAT_BYTES_MAX];
136
137 fprintf(f,
138 "%s%s=%s %s\n",
139 prefix,
140 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
141 b->path,
142 format_bytes(buf, sizeof(buf), b->bandwidth));
143 }
144}
145
146static int lookup_blkio_device(const char *p, dev_t *dev) {
147 struct stat st;
148 int r;
149
150 assert(p);
151 assert(dev);
152
153 r = stat(p, &st);
ab1f0633 154 if (r < 0) {
4ad49000
LP
155 log_warning("Couldn't stat device %s: %m", p);
156 return -errno;
ab1f0633 157 }
8e274523 158
4ad49000
LP
159 if (S_ISBLK(st.st_mode))
160 *dev = st.st_rdev;
161 else if (major(st.st_dev) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
164 *dev = st.st_dev;
165
166 /* If this is a partition, try to get the originating
167 * block device */
168 block_get_whole_disk(*dev, dev);
169 } else {
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
171 return -ENODEV;
172 }
8e274523 173
8e274523 174 return 0;
8e274523
LP
175}
176
4ad49000
LP
177static int whitelist_device(const char *path, const char *node, const char *acc) {
178 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
179 struct stat st;
8c6db833 180 int r;
8e274523 181
4ad49000
LP
182 assert(path);
183 assert(acc);
8e274523 184
4ad49000
LP
185 if (stat(node, &st) < 0) {
186 log_warning("Couldn't stat device %s", node);
187 return -errno;
188 }
189
190 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
191 log_warning("%s is not a device.", node);
192 return -ENODEV;
193 }
194
195 sprintf(buf,
196 "%c %u:%u %s",
197 S_ISCHR(st.st_mode) ? 'c' : 'b',
198 major(st.st_rdev), minor(st.st_rdev),
199 acc);
200
201 r = cg_set_attribute("devices", path, "devices.allow", buf);
202 if (r < 0)
203 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
204
205 return r;
8e274523
LP
206}
207
90060676
LP
208static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
209 _cleanup_fclose_ FILE *f = NULL;
210 char line[LINE_MAX];
211 bool good = false;
212 int r;
213
214 assert(path);
215 assert(acc);
216 assert(type == 'b' || type == 'c');
217
218 f = fopen("/proc/devices", "re");
219 if (!f) {
220 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
221 return -errno;
222 }
223
224 FOREACH_LINE(line, f, goto fail) {
225 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
226 unsigned maj;
227
228 truncate_nl(line);
229
230 if (type == 'c' && streq(line, "Character devices:")) {
231 good = true;
232 continue;
233 }
234
235 if (type == 'b' && streq(line, "Block devices:")) {
236 good = true;
237 continue;
238 }
239
240 if (isempty(line)) {
241 good = false;
242 continue;
243 }
244
245 if (!good)
246 continue;
247
248 p = strstrip(line);
249
250 w = strpbrk(p, WHITESPACE);
251 if (!w)
252 continue;
253 *w = 0;
254
255 r = safe_atou(p, &maj);
256 if (r < 0)
257 continue;
258 if (maj <= 0)
259 continue;
260
261 w++;
262 w += strspn(w, WHITESPACE);
e41969e3
LP
263
264 if (fnmatch(name, w, 0) != 0)
90060676
LP
265 continue;
266
267 sprintf(buf,
268 "%c %u:* %s",
269 type,
270 maj,
271 acc);
272
273 r = cg_set_attribute("devices", path, "devices.allow", buf);
274 if (r < 0)
275 log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
276 }
277
278 return 0;
279
280fail:
281 log_warning("Failed to read /proc/devices: %m");
282 return -errno;
283}
284
db785129 285void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
01efdf13 286 bool is_root;
4ad49000
LP
287 int r;
288
289 assert(c);
290 assert(path);
8e274523 291
4ad49000
LP
292 if (mask == 0)
293 return;
8e274523 294
01efdf13
LP
295 /* Some cgroup attributes are not support on the root cgroup,
296 * hence silently ignore */
297 is_root = isempty(path) || path_equal(path, "/");
298
299 if ((mask & CGROUP_CPU) && !is_root) {
b2f8b02e 300 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 301
db785129
LP
302 sprintf(buf, "%lu\n",
303 state == MANAGER_STARTING && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
304 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
4ad49000
LP
305 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
306 if (r < 0)
307 log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
b2f8b02e 308
9a054909 309 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e
LP
310 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
311 if (r < 0)
312 log_warning("Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
313
9a054909
LP
314 if (c->cpu_quota_per_sec_usec != (usec_t) -1) {
315 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
316 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
317 } else
318 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
319 if (r < 0)
320 log_warning("Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
4ad49000
LP
321 }
322
323 if (mask & CGROUP_BLKIO) {
324 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
325 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
326 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
327 CGroupBlockIODeviceWeight *w;
328 CGroupBlockIODeviceBandwidth *b;
329
01efdf13 330 if (!is_root) {
db785129
LP
331 sprintf(buf, "%lu\n", state == MANAGER_STARTING && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
332 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
01efdf13
LP
333 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
334 if (r < 0)
335 log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 336
01efdf13
LP
337 /* FIXME: no way to reset this list */
338 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
339 dev_t dev;
4ad49000 340
01efdf13
LP
341 r = lookup_blkio_device(w->path, &dev);
342 if (r < 0)
343 continue;
8e274523 344
01efdf13
LP
345 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
346 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
347 if (r < 0)
348 log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
349 }
4ad49000
LP
350 }
351
352 /* FIXME: no way to reset this list */
353 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
354 const char *a;
355 dev_t dev;
356
357 r = lookup_blkio_device(b->path, &dev);
358 if (r < 0)
359 continue;
360
361 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
362
363 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
364 r = cg_set_attribute("blkio", path, a, buf);
365 if (r < 0)
366 log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 367 }
8e274523
LP
368 }
369
4ad49000 370 if (mask & CGROUP_MEMORY) {
6a94f2e9 371 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
372 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
373
6a94f2e9
G
374 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
375 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
376 } else
377 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 378
4ad49000
LP
379 if (r < 0)
380 log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 381 }
8e274523 382
01efdf13 383 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 384 CGroupDeviceAllow *a;
8e274523 385
4ad49000
LP
386 if (c->device_allow || c->device_policy != CGROUP_AUTO)
387 r = cg_set_attribute("devices", path, "devices.deny", "a");
388 else
389 r = cg_set_attribute("devices", path, "devices.allow", "a");
390 if (r < 0)
01efdf13 391 log_warning("Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 392
4ad49000
LP
393 if (c->device_policy == CGROUP_CLOSED ||
394 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
395 static const char auto_devices[] =
7d711efb
LP
396 "/dev/null\0" "rwm\0"
397 "/dev/zero\0" "rwm\0"
398 "/dev/full\0" "rwm\0"
399 "/dev/random\0" "rwm\0"
400 "/dev/urandom\0" "rwm\0"
401 "/dev/tty\0" "rwm\0"
402 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
403
404 const char *x, *y;
405
406 NULSTR_FOREACH_PAIR(x, y, auto_devices)
407 whitelist_device(path, x, y);
7d711efb
LP
408
409 whitelist_major(path, "pts", 'c', "rw");
410 whitelist_major(path, "kdbus", 'c', "rw");
411 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
412 }
413
414 LIST_FOREACH(device_allow, a, c->device_allow) {
415 char acc[4];
416 unsigned k = 0;
417
418 if (a->r)
419 acc[k++] = 'r';
420 if (a->w)
421 acc[k++] = 'w';
422 if (a->m)
423 acc[k++] = 'm';
fb385181 424
4ad49000
LP
425 if (k == 0)
426 continue;
fb385181 427
4ad49000 428 acc[k++] = 0;
90060676
LP
429
430 if (startswith(a->path, "/dev/"))
431 whitelist_device(path, a->path, acc);
432 else if (startswith(a->path, "block-"))
433 whitelist_major(path, a->path + 6, 'b', acc);
434 else if (startswith(a->path, "char-"))
435 whitelist_major(path, a->path + 5, 'c', acc);
436 else
437 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
438 }
439 }
fb385181
LP
440}
441
db785129 442CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
4ad49000 443 CGroupControllerMask mask = 0;
8e274523 444
4ad49000 445 /* Figure out which controllers we need */
8e274523 446
b2f8b02e 447 if (c->cpu_accounting ||
db785129
LP
448 c->cpu_shares != (unsigned long) -1 ||
449 c->startup_cpu_shares != (unsigned long) -1 ||
db785129 450 c->cpu_quota_per_sec_usec != (usec_t) -1)
4ad49000 451 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 452
4ad49000 453 if (c->blockio_accounting ||
db785129
LP
454 c->blockio_weight != (unsigned long) -1 ||
455 c->startup_blockio_weight != (unsigned long) -1 ||
4ad49000 456 c->blockio_device_weights ||
db785129 457 c->blockio_device_bandwidths)
4ad49000 458 mask |= CGROUP_BLKIO;
ecedd90f 459
4ad49000 460 if (c->memory_accounting ||
ddca82ac 461 c->memory_limit != (uint64_t) -1)
4ad49000 462 mask |= CGROUP_MEMORY;
8e274523 463
4ad49000
LP
464 if (c->device_allow || c->device_policy != CGROUP_AUTO)
465 mask |= CGROUP_DEVICE;
466
467 return mask;
8e274523
LP
468}
469
bc432dc7 470CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 471 CGroupContext *c;
8e274523 472
4ad49000
LP
473 c = unit_get_cgroup_context(u);
474 if (!c)
475 return 0;
8e274523 476
db785129 477 return cgroup_context_get_mask(c);
8e274523
LP
478}
479
bc432dc7 480CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 481 assert(u);
bc432dc7
LP
482
483 if (u->cgroup_members_mask_valid)
484 return u->cgroup_members_mask;
485
486 u->cgroup_members_mask = 0;
487
488 if (u->type == UNIT_SLICE) {
489 Unit *member;
490 Iterator i;
491
492 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
493
494 if (member == u)
495 continue;
496
d4fdc205 497 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
498 continue;
499
500 u->cgroup_members_mask |=
501 unit_get_cgroup_mask(member) |
502 unit_get_members_mask(member);
503 }
504 }
505
506 u->cgroup_members_mask_valid = true;
6414b7c9 507 return u->cgroup_members_mask;
246aa6dd
LP
508}
509
bc432dc7 510CGroupControllerMask unit_get_siblings_mask(Unit *u) {
4ad49000 511 assert(u);
246aa6dd 512
bc432dc7 513 if (UNIT_ISSET(u->slice))
637f421e 514 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 515
637f421e 516 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
517}
518
bc432dc7 519CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
520 CGroupControllerMask mask;
521
522 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
523 mask &= u->manager->cgroup_supported;
524
525 return mask;
526}
527
528/* Recurse from a unit up through its containing slices, propagating
529 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
530void unit_update_cgroup_members_masks(Unit *u) {
531 CGroupControllerMask m;
532 bool more;
533
534 assert(u);
535
536 /* Calculate subtree mask */
537 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
538
539 /* See if anything changed from the previous invocation. If
540 * not, we're done. */
541 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
542 return;
543
544 more =
545 u->cgroup_subtree_mask_valid &&
546 ((m & ~u->cgroup_subtree_mask) != 0) &&
547 ((~m & u->cgroup_subtree_mask) == 0);
548
549 u->cgroup_subtree_mask = m;
550 u->cgroup_subtree_mask_valid = true;
551
6414b7c9
DS
552 if (UNIT_ISSET(u->slice)) {
553 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
554
555 if (more)
556 /* There's more set now than before. We
557 * propagate the new mask to the parent's mask
558 * (not caring if it actually was valid or
559 * not). */
560
561 s->cgroup_members_mask |= m;
562
563 else
564 /* There's less set now than before (or we
565 * don't know), we need to recalculate
566 * everything, so let's invalidate the
567 * parent's members mask */
568
569 s->cgroup_members_mask_valid = false;
570
571 /* And now make sure that this change also hits our
572 * grandparents */
573 unit_update_cgroup_members_masks(s);
6414b7c9
DS
574 }
575}
576
03b90d4b
LP
577static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
578 Unit *u = userdata;
579
580 assert(mask != 0);
581 assert(u);
582
583 while (u) {
584 if (u->cgroup_path &&
585 u->cgroup_realized &&
586 (u->cgroup_realized_mask & mask) == mask)
587 return u->cgroup_path;
588
589 u = UNIT_DEREF(u->slice);
590 }
591
592 return NULL;
593}
594
4ad49000 595static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 596 _cleanup_free_ char *path = NULL;
bc432dc7 597 int r;
64747e2d 598
4ad49000 599 assert(u);
64747e2d 600
4ad49000
LP
601 path = unit_default_cgroup_path(u);
602 if (!path)
a94042fa 603 return log_oom();
64747e2d 604
0a1eb06d 605 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
606 if (r < 0) {
607 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 608 return r;
b58b8e11 609 }
03b90d4b 610 if (r > 0) {
b58b8e11 611 u->cgroup_path = path;
a94042fa 612 path = NULL;
b58b8e11
HH
613 }
614
03b90d4b
LP
615 /* First, create our own group */
616 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
617 if (r < 0) {
618 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
619 return r;
620 }
621
622 /* Keep track that this is now realized */
4ad49000 623 u->cgroup_realized = true;
bc432dc7 624 u->cgroup_realized_mask = mask;
4ad49000 625
03b90d4b
LP
626 /* Then, possibly move things over */
627 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
628 if (r < 0)
629 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
630
64747e2d
LP
631 return 0;
632}
633
6414b7c9 634static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
635 assert(u);
636
637 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
638}
639
640/* Check if necessary controllers and attributes for a unit are in place.
641 *
642 * If so, do nothing.
643 * If not, create paths, move processes over, and set attributes.
644 *
645 * Returns 0 on success and < 0 on failure. */
db785129 646static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
4ad49000 647 CGroupControllerMask mask;
6414b7c9 648 int r;
64747e2d 649
4ad49000 650 assert(u);
64747e2d 651
4ad49000 652 if (u->in_cgroup_queue) {
71fda00f 653 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
654 u->in_cgroup_queue = false;
655 }
64747e2d 656
6414b7c9 657 mask = unit_get_target_mask(u);
64747e2d 658
6414b7c9 659 if (unit_has_mask_realized(u, mask))
0a1eb06d 660 return 0;
64747e2d 661
4ad49000 662 /* First, realize parents */
6414b7c9 663 if (UNIT_ISSET(u->slice)) {
db785129 664 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
665 if (r < 0)
666 return r;
667 }
4ad49000
LP
668
669 /* And then do the real work */
6414b7c9
DS
670 r = unit_create_cgroups(u, mask);
671 if (r < 0)
672 return r;
673
674 /* Finally, apply the necessary attributes. */
db785129 675 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
6414b7c9
DS
676
677 return 0;
64747e2d
LP
678}
679
4ad49000 680static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 681
4ad49000
LP
682 if (u->in_cgroup_queue)
683 return;
8e274523 684
71fda00f 685 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
686 u->in_cgroup_queue = true;
687}
8c6db833 688
4ad49000 689unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 690 ManagerState state;
4ad49000 691 unsigned n = 0;
db785129 692 Unit *i;
6414b7c9 693 int r;
ecedd90f 694
db785129
LP
695 state = manager_state(m);
696
4ad49000
LP
697 while ((i = m->cgroup_queue)) {
698 assert(i->in_cgroup_queue);
ecedd90f 699
db785129 700 r = unit_realize_cgroup_now(i, state);
6414b7c9
DS
701 if (r < 0)
702 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 703
4ad49000
LP
704 n++;
705 }
ecedd90f 706
4ad49000 707 return n;
8e274523
LP
708}
709
4ad49000
LP
710static void unit_queue_siblings(Unit *u) {
711 Unit *slice;
ca949c9d 712
4ad49000
LP
713 /* This adds the siblings of the specified unit and the
714 * siblings of all parent units to the cgroup queue. (But
715 * neither the specified unit itself nor the parents.) */
716
717 while ((slice = UNIT_DEREF(u->slice))) {
718 Iterator i;
719 Unit *m;
8f53a7b8 720
4ad49000
LP
721 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
722 if (m == u)
723 continue;
8e274523 724
6414b7c9
DS
725 /* Skip units that have a dependency on the slice
726 * but aren't actually in it. */
4ad49000 727 if (UNIT_DEREF(m->slice) != slice)
50159e6a 728 continue;
8e274523 729
6414b7c9
DS
730 /* No point in doing cgroup application for units
731 * without active processes. */
732 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
733 continue;
734
735 /* If the unit doesn't need any new controllers
736 * and has current ones realized, it doesn't need
737 * any changes. */
738 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
739 continue;
740
4ad49000 741 unit_add_to_cgroup_queue(m);
50159e6a
LP
742 }
743
4ad49000 744 u = slice;
8e274523 745 }
4ad49000
LP
746}
747
0a1eb06d 748int unit_realize_cgroup(Unit *u) {
4ad49000
LP
749 CGroupContext *c;
750
751 assert(u);
752
753 c = unit_get_cgroup_context(u);
754 if (!c)
0a1eb06d 755 return 0;
8e274523 756
4ad49000
LP
757 /* So, here's the deal: when realizing the cgroups for this
758 * unit, we need to first create all parents, but there's more
759 * actually: for the weight-based controllers we also need to
760 * make sure that all our siblings (i.e. units that are in the
73e231ab 761 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
762 * would become very uneven as each of their processes would
763 * get as much resources as all our group together. This call
764 * will synchronously create the parent cgroups, but will
765 * defer work on the siblings to the next event loop
766 * iteration. */
ca949c9d 767
4ad49000
LP
768 /* Add all sibling slices to the cgroup queue. */
769 unit_queue_siblings(u);
770
6414b7c9 771 /* And realize this one now (and apply the values) */
db785129 772 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
773}
774
4ad49000 775void unit_destroy_cgroup(Unit *u) {
8e274523
LP
776 int r;
777
4ad49000 778 assert(u);
8e274523 779
4ad49000
LP
780 if (!u->cgroup_path)
781 return;
8e274523 782
13b84ec7 783 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 784 if (r < 0)
376dd21d 785 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 786
0a1eb06d
LP
787 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
788
4ad49000
LP
789 free(u->cgroup_path);
790 u->cgroup_path = NULL;
791 u->cgroup_realized = false;
bc432dc7 792 u->cgroup_realized_mask = 0;
0a1eb06d 793
8e274523
LP
794}
795
4ad49000
LP
796pid_t unit_search_main_pid(Unit *u) {
797 _cleanup_fclose_ FILE *f = NULL;
798 pid_t pid = 0, npid, mypid;
799
800 assert(u);
801
802 if (!u->cgroup_path)
803 return 0;
804
805 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
806 return 0;
807
808 mypid = getpid();
809 while (cg_read_pid(f, &npid) > 0) {
810 pid_t ppid;
811
812 if (npid == pid)
813 continue;
8e274523 814
4ad49000
LP
815 /* Ignore processes that aren't our kids */
816 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
817 continue;
8e274523 818
4ad49000
LP
819 if (pid != 0) {
820 /* Dang, there's more than one daemonized PID
821 in this group, so we don't know what process
822 is the main process. */
823 pid = 0;
824 break;
825 }
8e274523 826
4ad49000 827 pid = npid;
8e274523
LP
828 }
829
4ad49000 830 return pid;
8e274523
LP
831}
832
8e274523 833int manager_setup_cgroup(Manager *m) {
9444b1f2 834 _cleanup_free_ char *path = NULL;
15c60e99 835 char *e;
8e274523 836 int r;
8e274523
LP
837
838 assert(m);
839
35d2e7ec 840 /* 1. Determine hierarchy */
9444b1f2
LP
841 free(m->cgroup_root);
842 m->cgroup_root = NULL;
843
844 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 845 if (r < 0) {
12235040 846 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 847 return r;
12235040 848 }
8e274523 849
15c60e99
LP
850 /* LEGACY: Already in /system.slice? If so, let's cut this
851 * off. This is to support live upgrades from older systemd
852 * versions where PID 1 was moved there. */
9444b1f2
LP
853 if (m->running_as == SYSTEMD_SYSTEM) {
854 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
855 if (!e)
856 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
857 if (e)
858 *e = 0;
0baf24dd 859 }
7ccfb64a 860
9444b1f2
LP
861 /* And make sure to store away the root value without trailing
862 * slash, even for the root dir, so that we can easily prepend
863 * it everywhere. */
864 if (streq(m->cgroup_root, "/"))
865 m->cgroup_root[0] = 0;
8e274523 866
35d2e7ec 867 /* 2. Show data */
9444b1f2 868 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 869 if (r < 0) {
12235040 870 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 871 return r;
12235040 872 }
8e274523 873
c6c18be3
LP
874 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
875
35d2e7ec 876 /* 3. Install agent */
a32360f1
LP
877 if (m->running_as == SYSTEMD_SYSTEM) {
878 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
879 if (r < 0)
880 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
881 else if (r > 0)
882 log_debug("Installed release agent.");
883 else
884 log_debug("Release agent already installed.");
885 }
8e274523 886
15c60e99
LP
887 /* 4. Make sure we are in the root cgroup */
888 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
9156e799 889 if (r < 0) {
8e274523 890 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 891 return r;
c6c18be3
LP
892 }
893
35d2e7ec 894 /* 5. And pin it, so that it cannot be unmounted */
03e334a1 895 safe_close(m->pin_cgroupfs_fd);
c6c18be3 896
9156e799 897 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
cd7affae 898 if (m->pin_cgroupfs_fd < 0) {
12235040 899 log_error("Failed to open pin file: %m");
a32360f1 900 return -errno;
c6c18be3
LP
901 }
902
4ad49000
LP
903 /* 6. Figure out which controllers are supported */
904 m->cgroup_supported = cg_mask_supported();
9156e799 905
e58cec11
LP
906 /* 7. Always enable hierarchial support if it exists... */
907 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
908
a32360f1 909 return 0;
8e274523
LP
910}
911
c6c18be3 912void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
913 assert(m);
914
9444b1f2
LP
915 /* We can't really delete the group, since we are in it. But
916 * let's trim it. */
917 if (delete && m->cgroup_root)
918 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 919
03e334a1 920 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 921
9444b1f2
LP
922 free(m->cgroup_root);
923 m->cgroup_root = NULL;
8e274523
LP
924}
925
4ad49000 926Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 927 char *p;
4ad49000 928 Unit *u;
acb14d31
LP
929
930 assert(m);
931 assert(cgroup);
acb14d31 932
4ad49000
LP
933 u = hashmap_get(m->cgroup_unit, cgroup);
934 if (u)
935 return u;
acb14d31 936
8e70580b 937 p = strdupa(cgroup);
acb14d31
LP
938 for (;;) {
939 char *e;
940
941 e = strrchr(p, '/');
4ad49000
LP
942 if (e == p || !e)
943 return NULL;
acb14d31
LP
944
945 *e = 0;
946
4ad49000
LP
947 u = hashmap_get(m->cgroup_unit, p);
948 if (u)
949 return u;
acb14d31
LP
950 }
951}
952
4ad49000
LP
953Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
954 _cleanup_free_ char *cgroup = NULL;
acb14d31 955 int r;
8e274523 956
8c47c732
LP
957 assert(m);
958
959 if (pid <= 1)
960 return NULL;
961
4ad49000
LP
962 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
963 if (r < 0)
6dde1f33
LP
964 return NULL;
965
4ad49000 966 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 967}
4fbf50b3 968
4ad49000
LP
969int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
970 Unit *u;
971 int r;
4fbf50b3 972
4ad49000
LP
973 assert(m);
974 assert(cgroup);
4fbf50b3 975
4ad49000 976 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 977 if (u) {
06025d91
LP
978 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
979 if (r > 0) {
980 if (UNIT_VTABLE(u)->notify_cgroup_empty)
981 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 982
06025d91
LP
983 unit_add_to_gc_queue(u);
984 }
b56c28c3 985 }
2633eb83 986
4ad49000 987 return 0;
4fbf50b3
LP
988}
989
4ad49000
LP
990static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
991 [CGROUP_AUTO] = "auto",
992 [CGROUP_CLOSED] = "closed",
993 [CGROUP_STRICT] = "strict",
994};
4fbf50b3 995
4ad49000 996DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);