]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
core: introduce "poweroff" as new failure action types
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
9a054909
LP
30#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
31
4ad49000
LP
32void cgroup_context_init(CGroupContext *c) {
33 assert(c);
34
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
37
db785129
LP
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
ddca82ac 40 c->memory_limit = (uint64_t) -1;
db785129
LP
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
b2f8b02e 43
3a43da28 44 c->cpu_quota_per_sec_usec = USEC_INFINITY;
4ad49000 45}
8e274523 46
4ad49000
LP
47void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
48 assert(c);
49 assert(a);
50
71fda00f 51 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
52 free(a->path);
53 free(a);
54}
55
56void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
57 assert(c);
58 assert(w);
59
71fda00f 60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
61 free(w->path);
62 free(w);
63}
64
65void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
66 assert(c);
8e274523 67 assert(b);
8e274523 68
71fda00f 69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
70 free(b->path);
71 free(b);
72}
73
74void cgroup_context_done(CGroupContext *c) {
75 assert(c);
76
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
79
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
82
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
85}
86
87void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
90 CGroupDeviceAllow *a;
9a054909 91 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
92
93 assert(c);
94 assert(f);
95
96 prefix = strempty(prefix);
97
98 fprintf(f,
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
102 "%sCPUShares=%lu\n"
95ae05c0 103 "%sStartupCPUShares=%lu\n"
b2f8b02e 104 "%sCPUQuotaPerSecSec=%s\n"
112a7f46 105 "%sBlockIOWeight=%lu\n"
95ae05c0 106 "%sStartupBlockIOWeight=%lu\n"
4ad49000 107 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
108 "%sDevicePolicy=%s\n",
109 prefix, yes_no(c->cpu_accounting),
110 prefix, yes_no(c->blockio_accounting),
111 prefix, yes_no(c->memory_accounting),
112 prefix, c->cpu_shares,
95ae05c0 113 prefix, c->startup_cpu_shares,
9a054909 114 prefix, strna(format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1)),
4ad49000 115 prefix, c->blockio_weight,
95ae05c0 116 prefix, c->startup_blockio_weight,
4ad49000 117 prefix, c->memory_limit,
4ad49000
LP
118 prefix, cgroup_device_policy_to_string(c->device_policy));
119
120 LIST_FOREACH(device_allow, a, c->device_allow)
121 fprintf(f,
122 "%sDeviceAllow=%s %s%s%s\n",
123 prefix,
124 a->path,
125 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
126
127 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
128 fprintf(f,
8e7076ca 129 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
130 prefix,
131 w->path,
132 w->weight);
133
134 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
135 char buf[FORMAT_BYTES_MAX];
136
137 fprintf(f,
138 "%s%s=%s %s\n",
139 prefix,
140 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
141 b->path,
142 format_bytes(buf, sizeof(buf), b->bandwidth));
143 }
144}
145
146static int lookup_blkio_device(const char *p, dev_t *dev) {
147 struct stat st;
148 int r;
149
150 assert(p);
151 assert(dev);
152
153 r = stat(p, &st);
ab1f0633 154 if (r < 0) {
4ad49000
LP
155 log_warning("Couldn't stat device %s: %m", p);
156 return -errno;
ab1f0633 157 }
8e274523 158
4ad49000
LP
159 if (S_ISBLK(st.st_mode))
160 *dev = st.st_rdev;
161 else if (major(st.st_dev) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
164 *dev = st.st_dev;
165
166 /* If this is a partition, try to get the originating
167 * block device */
168 block_get_whole_disk(*dev, dev);
169 } else {
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
171 return -ENODEV;
172 }
8e274523 173
8e274523 174 return 0;
8e274523
LP
175}
176
4ad49000
LP
177static int whitelist_device(const char *path, const char *node, const char *acc) {
178 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
179 struct stat st;
8c6db833 180 int r;
8e274523 181
4ad49000
LP
182 assert(path);
183 assert(acc);
8e274523 184
4ad49000
LP
185 if (stat(node, &st) < 0) {
186 log_warning("Couldn't stat device %s", node);
187 return -errno;
188 }
189
190 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
191 log_warning("%s is not a device.", node);
192 return -ENODEV;
193 }
194
195 sprintf(buf,
196 "%c %u:%u %s",
197 S_ISCHR(st.st_mode) ? 'c' : 'b',
198 major(st.st_rdev), minor(st.st_rdev),
199 acc);
200
201 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
202 if (r < 0)
203 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
4ad49000
LP
204
205 return r;
8e274523
LP
206}
207
90060676
LP
208static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
209 _cleanup_fclose_ FILE *f = NULL;
210 char line[LINE_MAX];
211 bool good = false;
212 int r;
213
214 assert(path);
215 assert(acc);
216 assert(type == 'b' || type == 'c');
217
218 f = fopen("/proc/devices", "re");
219 if (!f) {
220 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
221 return -errno;
222 }
223
224 FOREACH_LINE(line, f, goto fail) {
225 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
226 unsigned maj;
227
228 truncate_nl(line);
229
230 if (type == 'c' && streq(line, "Character devices:")) {
231 good = true;
232 continue;
233 }
234
235 if (type == 'b' && streq(line, "Block devices:")) {
236 good = true;
237 continue;
238 }
239
240 if (isempty(line)) {
241 good = false;
242 continue;
243 }
244
245 if (!good)
246 continue;
247
248 p = strstrip(line);
249
250 w = strpbrk(p, WHITESPACE);
251 if (!w)
252 continue;
253 *w = 0;
254
255 r = safe_atou(p, &maj);
256 if (r < 0)
257 continue;
258 if (maj <= 0)
259 continue;
260
261 w++;
262 w += strspn(w, WHITESPACE);
e41969e3
LP
263
264 if (fnmatch(name, w, 0) != 0)
90060676
LP
265 continue;
266
267 sprintf(buf,
268 "%c %u:* %s",
269 type,
270 maj,
271 acc);
272
273 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
274 if (r < 0)
275 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
90060676
LP
276 }
277
278 return 0;
279
280fail:
281 log_warning("Failed to read /proc/devices: %m");
282 return -errno;
283}
284
db785129 285void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
01efdf13 286 bool is_root;
4ad49000
LP
287 int r;
288
289 assert(c);
290 assert(path);
8e274523 291
4ad49000
LP
292 if (mask == 0)
293 return;
8e274523 294
01efdf13
LP
295 /* Some cgroup attributes are not support on the root cgroup,
296 * hence silently ignore */
297 is_root = isempty(path) || path_equal(path, "/");
298
299 if ((mask & CGROUP_CPU) && !is_root) {
b2f8b02e 300 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 301
db785129
LP
302 sprintf(buf, "%lu\n",
303 state == MANAGER_STARTING && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
304 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
4ad49000 305 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
1aeab12b
LP
306 if (r < 0)
307 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.shares on %s: %s", path, strerror(-r));
b2f8b02e 308
9a054909 309 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e 310 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
1aeab12b
LP
311 if (r < 0)
312 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
b2f8b02e 313
3a43da28 314 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
9a054909 315 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
316 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
317 } else
318 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
1aeab12b
LP
319 if (r < 0)
320 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
4ad49000
LP
321 }
322
323 if (mask & CGROUP_BLKIO) {
324 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
325 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
326 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
327 CGroupBlockIODeviceWeight *w;
328 CGroupBlockIODeviceBandwidth *b;
329
01efdf13 330 if (!is_root) {
db785129
LP
331 sprintf(buf, "%lu\n", state == MANAGER_STARTING && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
332 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
01efdf13 333 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
1aeab12b
LP
334 if (r < 0)
335 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 336
01efdf13
LP
337 /* FIXME: no way to reset this list */
338 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
339 dev_t dev;
4ad49000 340
01efdf13
LP
341 r = lookup_blkio_device(w->path, &dev);
342 if (r < 0)
343 continue;
8e274523 344
01efdf13
LP
345 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
346 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
1aeab12b
LP
347 if (r < 0)
348 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
01efdf13 349 }
4ad49000
LP
350 }
351
352 /* FIXME: no way to reset this list */
353 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
354 const char *a;
355 dev_t dev;
356
357 r = lookup_blkio_device(b->path, &dev);
358 if (r < 0)
359 continue;
360
361 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
362
363 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
364 r = cg_set_attribute("blkio", path, a, buf);
1aeab12b
LP
365 if (r < 0)
366 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 367 }
8e274523
LP
368 }
369
4ad49000 370 if (mask & CGROUP_MEMORY) {
6a94f2e9 371 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
372 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
373
6a94f2e9
G
374 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
375 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
376 } else
377 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 378
1aeab12b
LP
379 if (r < 0)
380 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 381 }
8e274523 382
01efdf13 383 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 384 CGroupDeviceAllow *a;
8e274523 385
4ad49000
LP
386 if (c->device_allow || c->device_policy != CGROUP_AUTO)
387 r = cg_set_attribute("devices", path, "devices.deny", "a");
388 else
389 r = cg_set_attribute("devices", path, "devices.allow", "a");
1aeab12b
LP
390 if (r < 0)
391 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 392
4ad49000
LP
393 if (c->device_policy == CGROUP_CLOSED ||
394 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
395 static const char auto_devices[] =
7d711efb
LP
396 "/dev/null\0" "rwm\0"
397 "/dev/zero\0" "rwm\0"
398 "/dev/full\0" "rwm\0"
399 "/dev/random\0" "rwm\0"
400 "/dev/urandom\0" "rwm\0"
401 "/dev/tty\0" "rwm\0"
402 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
403
404 const char *x, *y;
405
406 NULSTR_FOREACH_PAIR(x, y, auto_devices)
407 whitelist_device(path, x, y);
7d711efb
LP
408
409 whitelist_major(path, "pts", 'c', "rw");
410 whitelist_major(path, "kdbus", 'c', "rw");
411 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
412 }
413
414 LIST_FOREACH(device_allow, a, c->device_allow) {
415 char acc[4];
416 unsigned k = 0;
417
418 if (a->r)
419 acc[k++] = 'r';
420 if (a->w)
421 acc[k++] = 'w';
422 if (a->m)
423 acc[k++] = 'm';
fb385181 424
4ad49000
LP
425 if (k == 0)
426 continue;
fb385181 427
4ad49000 428 acc[k++] = 0;
90060676
LP
429
430 if (startswith(a->path, "/dev/"))
431 whitelist_device(path, a->path, acc);
432 else if (startswith(a->path, "block-"))
433 whitelist_major(path, a->path + 6, 'b', acc);
434 else if (startswith(a->path, "char-"))
435 whitelist_major(path, a->path + 5, 'c', acc);
436 else
437 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
438 }
439 }
fb385181
LP
440}
441
db785129 442CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
4ad49000 443 CGroupControllerMask mask = 0;
8e274523 444
4ad49000 445 /* Figure out which controllers we need */
8e274523 446
b2f8b02e 447 if (c->cpu_accounting ||
db785129
LP
448 c->cpu_shares != (unsigned long) -1 ||
449 c->startup_cpu_shares != (unsigned long) -1 ||
3a43da28 450 c->cpu_quota_per_sec_usec != USEC_INFINITY)
4ad49000 451 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 452
4ad49000 453 if (c->blockio_accounting ||
db785129
LP
454 c->blockio_weight != (unsigned long) -1 ||
455 c->startup_blockio_weight != (unsigned long) -1 ||
4ad49000 456 c->blockio_device_weights ||
db785129 457 c->blockio_device_bandwidths)
4ad49000 458 mask |= CGROUP_BLKIO;
ecedd90f 459
4ad49000 460 if (c->memory_accounting ||
ddca82ac 461 c->memory_limit != (uint64_t) -1)
4ad49000 462 mask |= CGROUP_MEMORY;
8e274523 463
4ad49000
LP
464 if (c->device_allow || c->device_policy != CGROUP_AUTO)
465 mask |= CGROUP_DEVICE;
466
467 return mask;
8e274523
LP
468}
469
bc432dc7 470CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 471 CGroupContext *c;
8e274523 472
4ad49000
LP
473 c = unit_get_cgroup_context(u);
474 if (!c)
475 return 0;
8e274523 476
db785129 477 return cgroup_context_get_mask(c);
8e274523
LP
478}
479
bc432dc7 480CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 481 assert(u);
bc432dc7
LP
482
483 if (u->cgroup_members_mask_valid)
484 return u->cgroup_members_mask;
485
486 u->cgroup_members_mask = 0;
487
488 if (u->type == UNIT_SLICE) {
489 Unit *member;
490 Iterator i;
491
492 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
493
494 if (member == u)
495 continue;
496
d4fdc205 497 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
498 continue;
499
500 u->cgroup_members_mask |=
501 unit_get_cgroup_mask(member) |
502 unit_get_members_mask(member);
503 }
504 }
505
506 u->cgroup_members_mask_valid = true;
6414b7c9 507 return u->cgroup_members_mask;
246aa6dd
LP
508}
509
bc432dc7 510CGroupControllerMask unit_get_siblings_mask(Unit *u) {
4ad49000 511 assert(u);
246aa6dd 512
bc432dc7 513 if (UNIT_ISSET(u->slice))
637f421e 514 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 515
637f421e 516 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
517}
518
bc432dc7 519CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
520 CGroupControllerMask mask;
521
522 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
523 mask &= u->manager->cgroup_supported;
524
525 return mask;
526}
527
528/* Recurse from a unit up through its containing slices, propagating
529 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
530void unit_update_cgroup_members_masks(Unit *u) {
531 CGroupControllerMask m;
532 bool more;
533
534 assert(u);
535
536 /* Calculate subtree mask */
537 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
538
539 /* See if anything changed from the previous invocation. If
540 * not, we're done. */
541 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
542 return;
543
544 more =
545 u->cgroup_subtree_mask_valid &&
546 ((m & ~u->cgroup_subtree_mask) != 0) &&
547 ((~m & u->cgroup_subtree_mask) == 0);
548
549 u->cgroup_subtree_mask = m;
550 u->cgroup_subtree_mask_valid = true;
551
6414b7c9
DS
552 if (UNIT_ISSET(u->slice)) {
553 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
554
555 if (more)
556 /* There's more set now than before. We
557 * propagate the new mask to the parent's mask
558 * (not caring if it actually was valid or
559 * not). */
560
561 s->cgroup_members_mask |= m;
562
563 else
564 /* There's less set now than before (or we
565 * don't know), we need to recalculate
566 * everything, so let's invalidate the
567 * parent's members mask */
568
569 s->cgroup_members_mask_valid = false;
570
571 /* And now make sure that this change also hits our
572 * grandparents */
573 unit_update_cgroup_members_masks(s);
6414b7c9
DS
574 }
575}
576
03b90d4b
LP
577static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
578 Unit *u = userdata;
579
580 assert(mask != 0);
581 assert(u);
582
583 while (u) {
584 if (u->cgroup_path &&
585 u->cgroup_realized &&
586 (u->cgroup_realized_mask & mask) == mask)
587 return u->cgroup_path;
588
589 u = UNIT_DEREF(u->slice);
590 }
591
592 return NULL;
593}
594
4ad49000 595static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 596 _cleanup_free_ char *path = NULL;
bc432dc7 597 int r;
64747e2d 598
4ad49000 599 assert(u);
64747e2d 600
4ad49000
LP
601 path = unit_default_cgroup_path(u);
602 if (!path)
a94042fa 603 return log_oom();
64747e2d 604
0a1eb06d 605 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
606 if (r < 0) {
607 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 608 return r;
b58b8e11 609 }
03b90d4b 610 if (r > 0) {
b58b8e11 611 u->cgroup_path = path;
a94042fa 612 path = NULL;
b58b8e11
HH
613 }
614
03b90d4b
LP
615 /* First, create our own group */
616 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
617 if (r < 0) {
618 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
619 return r;
620 }
621
622 /* Keep track that this is now realized */
4ad49000 623 u->cgroup_realized = true;
bc432dc7 624 u->cgroup_realized_mask = mask;
4ad49000 625
03b90d4b
LP
626 /* Then, possibly move things over */
627 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
628 if (r < 0)
629 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
630
64747e2d
LP
631 return 0;
632}
633
6414b7c9 634static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
635 assert(u);
636
637 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
638}
639
640/* Check if necessary controllers and attributes for a unit are in place.
641 *
642 * If so, do nothing.
643 * If not, create paths, move processes over, and set attributes.
644 *
645 * Returns 0 on success and < 0 on failure. */
db785129 646static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
4ad49000 647 CGroupControllerMask mask;
6414b7c9 648 int r;
64747e2d 649
4ad49000 650 assert(u);
64747e2d 651
4ad49000 652 if (u->in_cgroup_queue) {
71fda00f 653 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
654 u->in_cgroup_queue = false;
655 }
64747e2d 656
6414b7c9 657 mask = unit_get_target_mask(u);
64747e2d 658
6414b7c9 659 if (unit_has_mask_realized(u, mask))
0a1eb06d 660 return 0;
64747e2d 661
4ad49000 662 /* First, realize parents */
6414b7c9 663 if (UNIT_ISSET(u->slice)) {
db785129 664 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
665 if (r < 0)
666 return r;
667 }
4ad49000
LP
668
669 /* And then do the real work */
6414b7c9
DS
670 r = unit_create_cgroups(u, mask);
671 if (r < 0)
672 return r;
673
674 /* Finally, apply the necessary attributes. */
db785129 675 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
6414b7c9
DS
676
677 return 0;
64747e2d
LP
678}
679
4ad49000 680static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 681
4ad49000
LP
682 if (u->in_cgroup_queue)
683 return;
8e274523 684
71fda00f 685 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
686 u->in_cgroup_queue = true;
687}
8c6db833 688
4ad49000 689unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 690 ManagerState state;
4ad49000 691 unsigned n = 0;
db785129 692 Unit *i;
6414b7c9 693 int r;
ecedd90f 694
db785129
LP
695 state = manager_state(m);
696
4ad49000
LP
697 while ((i = m->cgroup_queue)) {
698 assert(i->in_cgroup_queue);
ecedd90f 699
db785129 700 r = unit_realize_cgroup_now(i, state);
6414b7c9
DS
701 if (r < 0)
702 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 703
4ad49000
LP
704 n++;
705 }
ecedd90f 706
4ad49000 707 return n;
8e274523
LP
708}
709
4ad49000
LP
710static void unit_queue_siblings(Unit *u) {
711 Unit *slice;
ca949c9d 712
4ad49000
LP
713 /* This adds the siblings of the specified unit and the
714 * siblings of all parent units to the cgroup queue. (But
715 * neither the specified unit itself nor the parents.) */
716
717 while ((slice = UNIT_DEREF(u->slice))) {
718 Iterator i;
719 Unit *m;
8f53a7b8 720
4ad49000
LP
721 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
722 if (m == u)
723 continue;
8e274523 724
6414b7c9
DS
725 /* Skip units that have a dependency on the slice
726 * but aren't actually in it. */
4ad49000 727 if (UNIT_DEREF(m->slice) != slice)
50159e6a 728 continue;
8e274523 729
6414b7c9
DS
730 /* No point in doing cgroup application for units
731 * without active processes. */
732 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
733 continue;
734
735 /* If the unit doesn't need any new controllers
736 * and has current ones realized, it doesn't need
737 * any changes. */
738 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
739 continue;
740
4ad49000 741 unit_add_to_cgroup_queue(m);
50159e6a
LP
742 }
743
4ad49000 744 u = slice;
8e274523 745 }
4ad49000
LP
746}
747
0a1eb06d 748int unit_realize_cgroup(Unit *u) {
4ad49000
LP
749 CGroupContext *c;
750
751 assert(u);
752
753 c = unit_get_cgroup_context(u);
754 if (!c)
0a1eb06d 755 return 0;
8e274523 756
4ad49000
LP
757 /* So, here's the deal: when realizing the cgroups for this
758 * unit, we need to first create all parents, but there's more
759 * actually: for the weight-based controllers we also need to
760 * make sure that all our siblings (i.e. units that are in the
73e231ab 761 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
762 * would become very uneven as each of their processes would
763 * get as much resources as all our group together. This call
764 * will synchronously create the parent cgroups, but will
765 * defer work on the siblings to the next event loop
766 * iteration. */
ca949c9d 767
4ad49000
LP
768 /* Add all sibling slices to the cgroup queue. */
769 unit_queue_siblings(u);
770
6414b7c9 771 /* And realize this one now (and apply the values) */
db785129 772 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
773}
774
4ad49000 775void unit_destroy_cgroup(Unit *u) {
8e274523
LP
776 int r;
777
4ad49000 778 assert(u);
8e274523 779
4ad49000
LP
780 if (!u->cgroup_path)
781 return;
8e274523 782
13b84ec7 783 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 784 if (r < 0)
376dd21d 785 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 786
0a1eb06d
LP
787 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
788
4ad49000
LP
789 free(u->cgroup_path);
790 u->cgroup_path = NULL;
791 u->cgroup_realized = false;
bc432dc7 792 u->cgroup_realized_mask = 0;
0a1eb06d 793
8e274523
LP
794}
795
4ad49000
LP
796pid_t unit_search_main_pid(Unit *u) {
797 _cleanup_fclose_ FILE *f = NULL;
798 pid_t pid = 0, npid, mypid;
799
800 assert(u);
801
802 if (!u->cgroup_path)
803 return 0;
804
805 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
806 return 0;
807
808 mypid = getpid();
809 while (cg_read_pid(f, &npid) > 0) {
810 pid_t ppid;
811
812 if (npid == pid)
813 continue;
8e274523 814
4ad49000
LP
815 /* Ignore processes that aren't our kids */
816 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
817 continue;
8e274523 818
4ad49000
LP
819 if (pid != 0) {
820 /* Dang, there's more than one daemonized PID
821 in this group, so we don't know what process
822 is the main process. */
823 pid = 0;
824 break;
825 }
8e274523 826
4ad49000 827 pid = npid;
8e274523
LP
828 }
829
4ad49000 830 return pid;
8e274523
LP
831}
832
8e274523 833int manager_setup_cgroup(Manager *m) {
9444b1f2 834 _cleanup_free_ char *path = NULL;
8e274523 835 int r;
8e274523
LP
836
837 assert(m);
838
35d2e7ec 839 /* 1. Determine hierarchy */
9444b1f2
LP
840 free(m->cgroup_root);
841 m->cgroup_root = NULL;
842
843 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 844 if (r < 0) {
12235040 845 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 846 return r;
12235040 847 }
8e274523 848
15c60e99
LP
849 /* LEGACY: Already in /system.slice? If so, let's cut this
850 * off. This is to support live upgrades from older systemd
851 * versions where PID 1 was moved there. */
9444b1f2 852 if (m->running_as == SYSTEMD_SYSTEM) {
0d8c31ff
ZJS
853 char *e;
854
9444b1f2 855 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
856 if (!e)
857 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
858 if (e)
859 *e = 0;
0baf24dd 860 }
7ccfb64a 861
9444b1f2
LP
862 /* And make sure to store away the root value without trailing
863 * slash, even for the root dir, so that we can easily prepend
864 * it everywhere. */
865 if (streq(m->cgroup_root, "/"))
866 m->cgroup_root[0] = 0;
8e274523 867
35d2e7ec 868 /* 2. Show data */
9444b1f2 869 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 870 if (r < 0) {
12235040 871 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 872 return r;
12235040 873 }
8e274523 874
c6c18be3 875 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
0d8c31ff 876 if (!m->test_run) {
c6c18be3 877
0d8c31ff
ZJS
878 /* 3. Install agent */
879 if (m->running_as == SYSTEMD_SYSTEM) {
880 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
881 if (r < 0)
882 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
883 else if (r > 0)
884 log_debug("Installed release agent.");
885 else
886 log_debug("Release agent already installed.");
887 }
8e274523 888
0d8c31ff
ZJS
889 /* 4. Make sure we are in the root cgroup */
890 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
891 if (r < 0) {
892 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
893 return r;
894 }
c6c18be3 895
0d8c31ff
ZJS
896 /* 5. And pin it, so that it cannot be unmounted */
897 safe_close(m->pin_cgroupfs_fd);
c6c18be3 898
0d8c31ff
ZJS
899 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
900 if (m->pin_cgroupfs_fd < 0) {
901 log_error("Failed to open pin file: %m");
902 return -errno;
903 }
904
905 /* 6. Always enable hierarchial support if it exists... */
906 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
c6c18be3
LP
907 }
908
0d8c31ff 909 /* 7. Figure out which controllers are supported */
4ad49000 910 m->cgroup_supported = cg_mask_supported();
9156e799 911
a32360f1 912 return 0;
8e274523
LP
913}
914
c6c18be3 915void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
916 assert(m);
917
9444b1f2
LP
918 /* We can't really delete the group, since we are in it. But
919 * let's trim it. */
920 if (delete && m->cgroup_root)
921 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 922
03e334a1 923 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 924
9444b1f2
LP
925 free(m->cgroup_root);
926 m->cgroup_root = NULL;
8e274523
LP
927}
928
4ad49000 929Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 930 char *p;
4ad49000 931 Unit *u;
acb14d31
LP
932
933 assert(m);
934 assert(cgroup);
acb14d31 935
4ad49000
LP
936 u = hashmap_get(m->cgroup_unit, cgroup);
937 if (u)
938 return u;
acb14d31 939
8e70580b 940 p = strdupa(cgroup);
acb14d31
LP
941 for (;;) {
942 char *e;
943
944 e = strrchr(p, '/');
4ad49000
LP
945 if (e == p || !e)
946 return NULL;
acb14d31
LP
947
948 *e = 0;
949
4ad49000
LP
950 u = hashmap_get(m->cgroup_unit, p);
951 if (u)
952 return u;
acb14d31
LP
953 }
954}
955
4ad49000
LP
956Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
957 _cleanup_free_ char *cgroup = NULL;
acb14d31 958 int r;
8e274523 959
8c47c732
LP
960 assert(m);
961
962 if (pid <= 1)
963 return NULL;
964
4ad49000
LP
965 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
966 if (r < 0)
6dde1f33
LP
967 return NULL;
968
4ad49000 969 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 970}
4fbf50b3 971
4ad49000
LP
972int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
973 Unit *u;
974 int r;
4fbf50b3 975
4ad49000
LP
976 assert(m);
977 assert(cgroup);
4fbf50b3 978
4ad49000 979 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 980 if (u) {
06025d91
LP
981 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
982 if (r > 0) {
983 if (UNIT_VTABLE(u)->notify_cgroup_empty)
984 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 985
06025d91
LP
986 unit_add_to_gc_queue(u);
987 }
b56c28c3 988 }
2633eb83 989
4ad49000 990 return 0;
4fbf50b3
LP
991}
992
4ad49000
LP
993static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
994 [CGROUP_AUTO] = "auto",
995 [CGROUP_CLOSED] = "closed",
996 [CGROUP_STRICT] = "strict",
997};
4fbf50b3 998
4ad49000 999DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);