]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
main,log: parse the log related kernel command line parameters at one place only...
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
9a054909
LP
30#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
31
4ad49000
LP
32void cgroup_context_init(CGroupContext *c) {
33 assert(c);
34
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
37
db785129
LP
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
ddca82ac 40 c->memory_limit = (uint64_t) -1;
db785129
LP
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
b2f8b02e 43
3a43da28 44 c->cpu_quota_per_sec_usec = USEC_INFINITY;
4ad49000 45}
8e274523 46
4ad49000
LP
47void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
48 assert(c);
49 assert(a);
50
71fda00f 51 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
52 free(a->path);
53 free(a);
54}
55
56void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
57 assert(c);
58 assert(w);
59
71fda00f 60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
61 free(w->path);
62 free(w);
63}
64
65void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
66 assert(c);
8e274523 67 assert(b);
8e274523 68
71fda00f 69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
70 free(b->path);
71 free(b);
72}
73
74void cgroup_context_done(CGroupContext *c) {
75 assert(c);
76
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
79
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
82
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
85}
86
87void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
90 CGroupDeviceAllow *a;
9a054909 91 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
92
93 assert(c);
94 assert(f);
95
96 prefix = strempty(prefix);
97
98 fprintf(f,
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
102 "%sCPUShares=%lu\n"
95ae05c0 103 "%sStartupCPUShares=%lu\n"
b2f8b02e 104 "%sCPUQuotaPerSecSec=%s\n"
112a7f46 105 "%sBlockIOWeight=%lu\n"
95ae05c0 106 "%sStartupBlockIOWeight=%lu\n"
4ad49000 107 "%sMemoryLimit=%" PRIu64 "\n"
4ad49000
LP
108 "%sDevicePolicy=%s\n",
109 prefix, yes_no(c->cpu_accounting),
110 prefix, yes_no(c->blockio_accounting),
111 prefix, yes_no(c->memory_accounting),
112 prefix, c->cpu_shares,
95ae05c0 113 prefix, c->startup_cpu_shares,
9a054909 114 prefix, strna(format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1)),
4ad49000 115 prefix, c->blockio_weight,
95ae05c0 116 prefix, c->startup_blockio_weight,
4ad49000 117 prefix, c->memory_limit,
4ad49000
LP
118 prefix, cgroup_device_policy_to_string(c->device_policy));
119
120 LIST_FOREACH(device_allow, a, c->device_allow)
121 fprintf(f,
122 "%sDeviceAllow=%s %s%s%s\n",
123 prefix,
124 a->path,
125 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
126
127 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
128 fprintf(f,
8e7076ca 129 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
130 prefix,
131 w->path,
132 w->weight);
133
134 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
135 char buf[FORMAT_BYTES_MAX];
136
137 fprintf(f,
138 "%s%s=%s %s\n",
139 prefix,
140 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
141 b->path,
142 format_bytes(buf, sizeof(buf), b->bandwidth));
143 }
144}
145
146static int lookup_blkio_device(const char *p, dev_t *dev) {
147 struct stat st;
148 int r;
149
150 assert(p);
151 assert(dev);
152
153 r = stat(p, &st);
ab1f0633 154 if (r < 0) {
4ad49000
LP
155 log_warning("Couldn't stat device %s: %m", p);
156 return -errno;
ab1f0633 157 }
8e274523 158
4ad49000
LP
159 if (S_ISBLK(st.st_mode))
160 *dev = st.st_rdev;
161 else if (major(st.st_dev) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
164 *dev = st.st_dev;
165
166 /* If this is a partition, try to get the originating
167 * block device */
168 block_get_whole_disk(*dev, dev);
169 } else {
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
171 return -ENODEV;
172 }
8e274523 173
8e274523 174 return 0;
8e274523
LP
175}
176
4ad49000
LP
177static int whitelist_device(const char *path, const char *node, const char *acc) {
178 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
179 struct stat st;
8c6db833 180 int r;
8e274523 181
4ad49000
LP
182 assert(path);
183 assert(acc);
8e274523 184
4ad49000
LP
185 if (stat(node, &st) < 0) {
186 log_warning("Couldn't stat device %s", node);
187 return -errno;
188 }
189
190 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
191 log_warning("%s is not a device.", node);
192 return -ENODEV;
193 }
194
195 sprintf(buf,
196 "%c %u:%u %s",
197 S_ISCHR(st.st_mode) ? 'c' : 'b',
198 major(st.st_rdev), minor(st.st_rdev),
199 acc);
200
201 r = cg_set_attribute("devices", path, "devices.allow", buf);
6b2f67b3 202 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
4ad49000
LP
203
204 return r;
8e274523
LP
205}
206
90060676
LP
207static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
208 _cleanup_fclose_ FILE *f = NULL;
209 char line[LINE_MAX];
210 bool good = false;
211 int r;
212
213 assert(path);
214 assert(acc);
215 assert(type == 'b' || type == 'c');
216
217 f = fopen("/proc/devices", "re");
218 if (!f) {
219 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
220 return -errno;
221 }
222
223 FOREACH_LINE(line, f, goto fail) {
224 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
225 unsigned maj;
226
227 truncate_nl(line);
228
229 if (type == 'c' && streq(line, "Character devices:")) {
230 good = true;
231 continue;
232 }
233
234 if (type == 'b' && streq(line, "Block devices:")) {
235 good = true;
236 continue;
237 }
238
239 if (isempty(line)) {
240 good = false;
241 continue;
242 }
243
244 if (!good)
245 continue;
246
247 p = strstrip(line);
248
249 w = strpbrk(p, WHITESPACE);
250 if (!w)
251 continue;
252 *w = 0;
253
254 r = safe_atou(p, &maj);
255 if (r < 0)
256 continue;
257 if (maj <= 0)
258 continue;
259
260 w++;
261 w += strspn(w, WHITESPACE);
e41969e3
LP
262
263 if (fnmatch(name, w, 0) != 0)
90060676
LP
264 continue;
265
266 sprintf(buf,
267 "%c %u:* %s",
268 type,
269 maj,
270 acc);
271
272 r = cg_set_attribute("devices", path, "devices.allow", buf);
6b2f67b3 273 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
90060676
LP
274 }
275
276 return 0;
277
278fail:
279 log_warning("Failed to read /proc/devices: %m");
280 return -errno;
281}
282
db785129 283void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
01efdf13 284 bool is_root;
4ad49000
LP
285 int r;
286
287 assert(c);
288 assert(path);
8e274523 289
4ad49000
LP
290 if (mask == 0)
291 return;
8e274523 292
01efdf13
LP
293 /* Some cgroup attributes are not support on the root cgroup,
294 * hence silently ignore */
295 is_root = isempty(path) || path_equal(path, "/");
296
297 if ((mask & CGROUP_CPU) && !is_root) {
b2f8b02e 298 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 299
db785129
LP
300 sprintf(buf, "%lu\n",
301 state == MANAGER_STARTING && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
302 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
4ad49000 303 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
6b2f67b3 304 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.shares on %s: %s", path, strerror(-r));
b2f8b02e 305
9a054909 306 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e 307 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
6b2f67b3 308 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
b2f8b02e 309
3a43da28 310 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
9a054909 311 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
312 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
313 } else
314 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
6b2f67b3 315 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
4ad49000
LP
316 }
317
318 if (mask & CGROUP_BLKIO) {
319 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
320 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
321 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
322 CGroupBlockIODeviceWeight *w;
323 CGroupBlockIODeviceBandwidth *b;
324
01efdf13 325 if (!is_root) {
db785129
LP
326 sprintf(buf, "%lu\n", state == MANAGER_STARTING && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
327 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
01efdf13 328 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
6b2f67b3 329 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 330
01efdf13
LP
331 /* FIXME: no way to reset this list */
332 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
333 dev_t dev;
4ad49000 334
01efdf13
LP
335 r = lookup_blkio_device(w->path, &dev);
336 if (r < 0)
337 continue;
8e274523 338
01efdf13
LP
339 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
340 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
6b2f67b3 341 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
01efdf13 342 }
4ad49000
LP
343 }
344
345 /* FIXME: no way to reset this list */
346 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
347 const char *a;
348 dev_t dev;
349
350 r = lookup_blkio_device(b->path, &dev);
351 if (r < 0)
352 continue;
353
354 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
355
356 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
357 r = cg_set_attribute("blkio", path, a, buf);
6b2f67b3 358 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 359 }
8e274523
LP
360 }
361
4ad49000 362 if (mask & CGROUP_MEMORY) {
6a94f2e9 363 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
364 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
365
6a94f2e9
G
366 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
367 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
368 } else
369 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 370
6b2f67b3 371 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 372 }
8e274523 373
01efdf13 374 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 375 CGroupDeviceAllow *a;
8e274523 376
4ad49000
LP
377 if (c->device_allow || c->device_policy != CGROUP_AUTO)
378 r = cg_set_attribute("devices", path, "devices.deny", "a");
379 else
380 r = cg_set_attribute("devices", path, "devices.allow", "a");
6b2f67b3 381 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 382
4ad49000
LP
383 if (c->device_policy == CGROUP_CLOSED ||
384 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
385 static const char auto_devices[] =
7d711efb
LP
386 "/dev/null\0" "rwm\0"
387 "/dev/zero\0" "rwm\0"
388 "/dev/full\0" "rwm\0"
389 "/dev/random\0" "rwm\0"
390 "/dev/urandom\0" "rwm\0"
391 "/dev/tty\0" "rwm\0"
392 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
393
394 const char *x, *y;
395
396 NULSTR_FOREACH_PAIR(x, y, auto_devices)
397 whitelist_device(path, x, y);
7d711efb
LP
398
399 whitelist_major(path, "pts", 'c', "rw");
400 whitelist_major(path, "kdbus", 'c', "rw");
401 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
402 }
403
404 LIST_FOREACH(device_allow, a, c->device_allow) {
405 char acc[4];
406 unsigned k = 0;
407
408 if (a->r)
409 acc[k++] = 'r';
410 if (a->w)
411 acc[k++] = 'w';
412 if (a->m)
413 acc[k++] = 'm';
fb385181 414
4ad49000
LP
415 if (k == 0)
416 continue;
fb385181 417
4ad49000 418 acc[k++] = 0;
90060676
LP
419
420 if (startswith(a->path, "/dev/"))
421 whitelist_device(path, a->path, acc);
422 else if (startswith(a->path, "block-"))
423 whitelist_major(path, a->path + 6, 'b', acc);
424 else if (startswith(a->path, "char-"))
425 whitelist_major(path, a->path + 5, 'c', acc);
426 else
427 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
428 }
429 }
fb385181
LP
430}
431
db785129 432CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
4ad49000 433 CGroupControllerMask mask = 0;
8e274523 434
4ad49000 435 /* Figure out which controllers we need */
8e274523 436
b2f8b02e 437 if (c->cpu_accounting ||
db785129
LP
438 c->cpu_shares != (unsigned long) -1 ||
439 c->startup_cpu_shares != (unsigned long) -1 ||
3a43da28 440 c->cpu_quota_per_sec_usec != USEC_INFINITY)
4ad49000 441 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 442
4ad49000 443 if (c->blockio_accounting ||
db785129
LP
444 c->blockio_weight != (unsigned long) -1 ||
445 c->startup_blockio_weight != (unsigned long) -1 ||
4ad49000 446 c->blockio_device_weights ||
db785129 447 c->blockio_device_bandwidths)
4ad49000 448 mask |= CGROUP_BLKIO;
ecedd90f 449
4ad49000 450 if (c->memory_accounting ||
ddca82ac 451 c->memory_limit != (uint64_t) -1)
4ad49000 452 mask |= CGROUP_MEMORY;
8e274523 453
4ad49000
LP
454 if (c->device_allow || c->device_policy != CGROUP_AUTO)
455 mask |= CGROUP_DEVICE;
456
457 return mask;
8e274523
LP
458}
459
bc432dc7 460CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 461 CGroupContext *c;
8e274523 462
4ad49000
LP
463 c = unit_get_cgroup_context(u);
464 if (!c)
465 return 0;
8e274523 466
db785129 467 return cgroup_context_get_mask(c);
8e274523
LP
468}
469
bc432dc7 470CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 471 assert(u);
bc432dc7
LP
472
473 if (u->cgroup_members_mask_valid)
474 return u->cgroup_members_mask;
475
476 u->cgroup_members_mask = 0;
477
478 if (u->type == UNIT_SLICE) {
479 Unit *member;
480 Iterator i;
481
482 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
483
484 if (member == u)
485 continue;
486
d4fdc205 487 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
488 continue;
489
490 u->cgroup_members_mask |=
491 unit_get_cgroup_mask(member) |
492 unit_get_members_mask(member);
493 }
494 }
495
496 u->cgroup_members_mask_valid = true;
6414b7c9 497 return u->cgroup_members_mask;
246aa6dd
LP
498}
499
bc432dc7 500CGroupControllerMask unit_get_siblings_mask(Unit *u) {
4ad49000 501 assert(u);
246aa6dd 502
bc432dc7 503 if (UNIT_ISSET(u->slice))
637f421e 504 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 505
637f421e 506 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
507}
508
bc432dc7 509CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
510 CGroupControllerMask mask;
511
512 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
513 mask &= u->manager->cgroup_supported;
514
515 return mask;
516}
517
518/* Recurse from a unit up through its containing slices, propagating
519 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
520void unit_update_cgroup_members_masks(Unit *u) {
521 CGroupControllerMask m;
522 bool more;
523
524 assert(u);
525
526 /* Calculate subtree mask */
527 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
528
529 /* See if anything changed from the previous invocation. If
530 * not, we're done. */
531 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
532 return;
533
534 more =
535 u->cgroup_subtree_mask_valid &&
536 ((m & ~u->cgroup_subtree_mask) != 0) &&
537 ((~m & u->cgroup_subtree_mask) == 0);
538
539 u->cgroup_subtree_mask = m;
540 u->cgroup_subtree_mask_valid = true;
541
6414b7c9
DS
542 if (UNIT_ISSET(u->slice)) {
543 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
544
545 if (more)
546 /* There's more set now than before. We
547 * propagate the new mask to the parent's mask
548 * (not caring if it actually was valid or
549 * not). */
550
551 s->cgroup_members_mask |= m;
552
553 else
554 /* There's less set now than before (or we
555 * don't know), we need to recalculate
556 * everything, so let's invalidate the
557 * parent's members mask */
558
559 s->cgroup_members_mask_valid = false;
560
561 /* And now make sure that this change also hits our
562 * grandparents */
563 unit_update_cgroup_members_masks(s);
6414b7c9
DS
564 }
565}
566
03b90d4b
LP
567static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
568 Unit *u = userdata;
569
570 assert(mask != 0);
571 assert(u);
572
573 while (u) {
574 if (u->cgroup_path &&
575 u->cgroup_realized &&
576 (u->cgroup_realized_mask & mask) == mask)
577 return u->cgroup_path;
578
579 u = UNIT_DEREF(u->slice);
580 }
581
582 return NULL;
583}
584
4ad49000 585static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 586 _cleanup_free_ char *path = NULL;
bc432dc7 587 int r;
64747e2d 588
4ad49000 589 assert(u);
64747e2d 590
4ad49000
LP
591 path = unit_default_cgroup_path(u);
592 if (!path)
a94042fa 593 return log_oom();
64747e2d 594
0a1eb06d 595 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
596 if (r < 0) {
597 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 598 return r;
b58b8e11 599 }
03b90d4b 600 if (r > 0) {
b58b8e11 601 u->cgroup_path = path;
a94042fa 602 path = NULL;
b58b8e11
HH
603 }
604
03b90d4b
LP
605 /* First, create our own group */
606 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
607 if (r < 0) {
608 log_error("Failed to create cgroup %s: %s", u->cgroup_path, strerror(-r));
609 return r;
610 }
611
612 /* Keep track that this is now realized */
4ad49000 613 u->cgroup_realized = true;
bc432dc7 614 u->cgroup_realized_mask = mask;
4ad49000 615
03b90d4b
LP
616 /* Then, possibly move things over */
617 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
618 if (r < 0)
619 log_warning("Failed to migrate cgroup from to %s: %s", u->cgroup_path, strerror(-r));
620
64747e2d
LP
621 return 0;
622}
623
6414b7c9 624static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
625 assert(u);
626
627 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
628}
629
630/* Check if necessary controllers and attributes for a unit are in place.
631 *
632 * If so, do nothing.
633 * If not, create paths, move processes over, and set attributes.
634 *
635 * Returns 0 on success and < 0 on failure. */
db785129 636static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
4ad49000 637 CGroupControllerMask mask;
6414b7c9 638 int r;
64747e2d 639
4ad49000 640 assert(u);
64747e2d 641
4ad49000 642 if (u->in_cgroup_queue) {
71fda00f 643 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
644 u->in_cgroup_queue = false;
645 }
64747e2d 646
6414b7c9 647 mask = unit_get_target_mask(u);
64747e2d 648
6414b7c9 649 if (unit_has_mask_realized(u, mask))
0a1eb06d 650 return 0;
64747e2d 651
4ad49000 652 /* First, realize parents */
6414b7c9 653 if (UNIT_ISSET(u->slice)) {
db785129 654 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
655 if (r < 0)
656 return r;
657 }
4ad49000
LP
658
659 /* And then do the real work */
6414b7c9
DS
660 r = unit_create_cgroups(u, mask);
661 if (r < 0)
662 return r;
663
664 /* Finally, apply the necessary attributes. */
db785129 665 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
6414b7c9
DS
666
667 return 0;
64747e2d
LP
668}
669
4ad49000 670static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 671
4ad49000
LP
672 if (u->in_cgroup_queue)
673 return;
8e274523 674
71fda00f 675 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
676 u->in_cgroup_queue = true;
677}
8c6db833 678
4ad49000 679unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 680 ManagerState state;
4ad49000 681 unsigned n = 0;
db785129 682 Unit *i;
6414b7c9 683 int r;
ecedd90f 684
db785129
LP
685 state = manager_state(m);
686
4ad49000
LP
687 while ((i = m->cgroup_queue)) {
688 assert(i->in_cgroup_queue);
ecedd90f 689
db785129 690 r = unit_realize_cgroup_now(i, state);
6414b7c9
DS
691 if (r < 0)
692 log_warning("Failed to realize cgroups for queued unit %s: %s", i->id, strerror(-r));
0a1eb06d 693
4ad49000
LP
694 n++;
695 }
ecedd90f 696
4ad49000 697 return n;
8e274523
LP
698}
699
4ad49000
LP
700static void unit_queue_siblings(Unit *u) {
701 Unit *slice;
ca949c9d 702
4ad49000
LP
703 /* This adds the siblings of the specified unit and the
704 * siblings of all parent units to the cgroup queue. (But
705 * neither the specified unit itself nor the parents.) */
706
707 while ((slice = UNIT_DEREF(u->slice))) {
708 Iterator i;
709 Unit *m;
8f53a7b8 710
4ad49000
LP
711 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
712 if (m == u)
713 continue;
8e274523 714
6414b7c9
DS
715 /* Skip units that have a dependency on the slice
716 * but aren't actually in it. */
4ad49000 717 if (UNIT_DEREF(m->slice) != slice)
50159e6a 718 continue;
8e274523 719
6414b7c9
DS
720 /* No point in doing cgroup application for units
721 * without active processes. */
722 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
723 continue;
724
725 /* If the unit doesn't need any new controllers
726 * and has current ones realized, it doesn't need
727 * any changes. */
728 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
729 continue;
730
4ad49000 731 unit_add_to_cgroup_queue(m);
50159e6a
LP
732 }
733
4ad49000 734 u = slice;
8e274523 735 }
4ad49000
LP
736}
737
0a1eb06d 738int unit_realize_cgroup(Unit *u) {
4ad49000
LP
739 CGroupContext *c;
740
741 assert(u);
742
743 c = unit_get_cgroup_context(u);
744 if (!c)
0a1eb06d 745 return 0;
8e274523 746
4ad49000
LP
747 /* So, here's the deal: when realizing the cgroups for this
748 * unit, we need to first create all parents, but there's more
749 * actually: for the weight-based controllers we also need to
750 * make sure that all our siblings (i.e. units that are in the
73e231ab 751 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
752 * would become very uneven as each of their processes would
753 * get as much resources as all our group together. This call
754 * will synchronously create the parent cgroups, but will
755 * defer work on the siblings to the next event loop
756 * iteration. */
ca949c9d 757
4ad49000
LP
758 /* Add all sibling slices to the cgroup queue. */
759 unit_queue_siblings(u);
760
6414b7c9 761 /* And realize this one now (and apply the values) */
db785129 762 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
763}
764
4ad49000 765void unit_destroy_cgroup(Unit *u) {
8e274523
LP
766 int r;
767
4ad49000 768 assert(u);
8e274523 769
4ad49000
LP
770 if (!u->cgroup_path)
771 return;
8e274523 772
13b84ec7 773 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 774 if (r < 0)
376dd21d 775 log_debug("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
8e274523 776
0a1eb06d
LP
777 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
778
4ad49000
LP
779 free(u->cgroup_path);
780 u->cgroup_path = NULL;
781 u->cgroup_realized = false;
bc432dc7 782 u->cgroup_realized_mask = 0;
0a1eb06d 783
8e274523
LP
784}
785
4ad49000
LP
786pid_t unit_search_main_pid(Unit *u) {
787 _cleanup_fclose_ FILE *f = NULL;
788 pid_t pid = 0, npid, mypid;
789
790 assert(u);
791
792 if (!u->cgroup_path)
793 return 0;
794
795 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
796 return 0;
797
798 mypid = getpid();
799 while (cg_read_pid(f, &npid) > 0) {
800 pid_t ppid;
801
802 if (npid == pid)
803 continue;
8e274523 804
4ad49000
LP
805 /* Ignore processes that aren't our kids */
806 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
807 continue;
8e274523 808
4ad49000
LP
809 if (pid != 0) {
810 /* Dang, there's more than one daemonized PID
811 in this group, so we don't know what process
812 is the main process. */
813 pid = 0;
814 break;
815 }
8e274523 816
4ad49000 817 pid = npid;
8e274523
LP
818 }
819
4ad49000 820 return pid;
8e274523
LP
821}
822
8e274523 823int manager_setup_cgroup(Manager *m) {
9444b1f2 824 _cleanup_free_ char *path = NULL;
8e274523 825 int r;
8e274523
LP
826
827 assert(m);
828
35d2e7ec 829 /* 1. Determine hierarchy */
9444b1f2
LP
830 free(m->cgroup_root);
831 m->cgroup_root = NULL;
832
833 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 834 if (r < 0) {
12235040 835 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 836 return r;
12235040 837 }
8e274523 838
15c60e99
LP
839 /* LEGACY: Already in /system.slice? If so, let's cut this
840 * off. This is to support live upgrades from older systemd
841 * versions where PID 1 was moved there. */
9444b1f2 842 if (m->running_as == SYSTEMD_SYSTEM) {
0d8c31ff
ZJS
843 char *e;
844
9444b1f2 845 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
846 if (!e)
847 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
848 if (e)
849 *e = 0;
0baf24dd 850 }
7ccfb64a 851
9444b1f2
LP
852 /* And make sure to store away the root value without trailing
853 * slash, even for the root dir, so that we can easily prepend
854 * it everywhere. */
855 if (streq(m->cgroup_root, "/"))
856 m->cgroup_root[0] = 0;
8e274523 857
35d2e7ec 858 /* 2. Show data */
9444b1f2 859 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 860 if (r < 0) {
12235040 861 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 862 return r;
12235040 863 }
8e274523 864
c6c18be3 865 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
0d8c31ff 866 if (!m->test_run) {
c6c18be3 867
0d8c31ff
ZJS
868 /* 3. Install agent */
869 if (m->running_as == SYSTEMD_SYSTEM) {
870 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
871 if (r < 0)
872 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
873 else if (r > 0)
874 log_debug("Installed release agent.");
875 else
876 log_debug("Release agent already installed.");
877 }
8e274523 878
0d8c31ff
ZJS
879 /* 4. Make sure we are in the root cgroup */
880 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
881 if (r < 0) {
882 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
883 return r;
884 }
c6c18be3 885
0d8c31ff
ZJS
886 /* 5. And pin it, so that it cannot be unmounted */
887 safe_close(m->pin_cgroupfs_fd);
c6c18be3 888
0d8c31ff
ZJS
889 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
890 if (m->pin_cgroupfs_fd < 0) {
891 log_error("Failed to open pin file: %m");
892 return -errno;
893 }
894
895 /* 6. Always enable hierarchial support if it exists... */
896 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
c6c18be3
LP
897 }
898
0d8c31ff 899 /* 7. Figure out which controllers are supported */
4ad49000 900 m->cgroup_supported = cg_mask_supported();
9156e799 901
a32360f1 902 return 0;
8e274523
LP
903}
904
c6c18be3 905void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
906 assert(m);
907
9444b1f2
LP
908 /* We can't really delete the group, since we are in it. But
909 * let's trim it. */
910 if (delete && m->cgroup_root)
911 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 912
03e334a1 913 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 914
9444b1f2
LP
915 free(m->cgroup_root);
916 m->cgroup_root = NULL;
8e274523
LP
917}
918
4ad49000 919Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 920 char *p;
4ad49000 921 Unit *u;
acb14d31
LP
922
923 assert(m);
924 assert(cgroup);
acb14d31 925
4ad49000
LP
926 u = hashmap_get(m->cgroup_unit, cgroup);
927 if (u)
928 return u;
acb14d31 929
8e70580b 930 p = strdupa(cgroup);
acb14d31
LP
931 for (;;) {
932 char *e;
933
934 e = strrchr(p, '/');
4ad49000
LP
935 if (e == p || !e)
936 return NULL;
acb14d31
LP
937
938 *e = 0;
939
4ad49000
LP
940 u = hashmap_get(m->cgroup_unit, p);
941 if (u)
942 return u;
acb14d31
LP
943 }
944}
945
4ad49000
LP
946Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
947 _cleanup_free_ char *cgroup = NULL;
acb14d31 948 int r;
8e274523 949
8c47c732
LP
950 assert(m);
951
952 if (pid <= 1)
953 return NULL;
954
4ad49000
LP
955 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
956 if (r < 0)
6dde1f33
LP
957 return NULL;
958
4ad49000 959 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 960}
4fbf50b3 961
4ad49000
LP
962int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
963 Unit *u;
964 int r;
4fbf50b3 965
4ad49000
LP
966 assert(m);
967 assert(cgroup);
4fbf50b3 968
4ad49000 969 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 970 if (u) {
06025d91
LP
971 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
972 if (r > 0) {
973 if (UNIT_VTABLE(u)->notify_cgroup_empty)
974 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 975
06025d91
LP
976 unit_add_to_gc_queue(u);
977 }
b56c28c3 978 }
2633eb83 979
4ad49000 980 return 0;
4fbf50b3
LP
981}
982
4ad49000
LP
983static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
984 [CGROUP_AUTO] = "auto",
985 [CGROUP_CLOSED] = "closed",
986 [CGROUP_STRICT] = "strict",
987};
4fbf50b3 988
4ad49000 989DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);