]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
hwdb: fix a typo
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
9a054909
LP
30#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
31
4ad49000
LP
32void cgroup_context_init(CGroupContext *c) {
33 assert(c);
34
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
37
db785129
LP
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
ddca82ac 40 c->memory_limit = (uint64_t) -1;
db785129
LP
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
b2f8b02e 43
3a43da28 44 c->cpu_quota_per_sec_usec = USEC_INFINITY;
4ad49000 45}
8e274523 46
4ad49000
LP
47void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
48 assert(c);
49 assert(a);
50
71fda00f 51 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
52 free(a->path);
53 free(a);
54}
55
56void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
57 assert(c);
58 assert(w);
59
71fda00f 60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
61 free(w->path);
62 free(w);
63}
64
65void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
66 assert(c);
8e274523 67 assert(b);
8e274523 68
71fda00f 69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
70 free(b->path);
71 free(b);
72}
73
74void cgroup_context_done(CGroupContext *c) {
75 assert(c);
76
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
79
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
82
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
85}
86
87void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
90 CGroupDeviceAllow *a;
9a054909 91 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
92
93 assert(c);
94 assert(f);
95
96 prefix = strempty(prefix);
97
98 fprintf(f,
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
102 "%sCPUShares=%lu\n"
95ae05c0 103 "%sStartupCPUShares=%lu\n"
b2f8b02e 104 "%sCPUQuotaPerSecSec=%s\n"
112a7f46 105 "%sBlockIOWeight=%lu\n"
95ae05c0 106 "%sStartupBlockIOWeight=%lu\n"
4ad49000 107 "%sMemoryLimit=%" PRIu64 "\n"
a931ad47
LP
108 "%sDevicePolicy=%s\n"
109 "%sDelegate=%s\n",
4ad49000
LP
110 prefix, yes_no(c->cpu_accounting),
111 prefix, yes_no(c->blockio_accounting),
112 prefix, yes_no(c->memory_accounting),
113 prefix, c->cpu_shares,
95ae05c0 114 prefix, c->startup_cpu_shares,
b1d6dcf5 115 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
4ad49000 116 prefix, c->blockio_weight,
95ae05c0 117 prefix, c->startup_blockio_weight,
4ad49000 118 prefix, c->memory_limit,
a931ad47
LP
119 prefix, cgroup_device_policy_to_string(c->device_policy),
120 prefix, yes_no(c->delegate));
4ad49000
LP
121
122 LIST_FOREACH(device_allow, a, c->device_allow)
123 fprintf(f,
124 "%sDeviceAllow=%s %s%s%s\n",
125 prefix,
126 a->path,
127 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
128
129 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
130 fprintf(f,
8e7076ca 131 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
132 prefix,
133 w->path,
134 w->weight);
135
136 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
137 char buf[FORMAT_BYTES_MAX];
138
139 fprintf(f,
140 "%s%s=%s %s\n",
141 prefix,
142 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
143 b->path,
144 format_bytes(buf, sizeof(buf), b->bandwidth));
145 }
146}
147
148static int lookup_blkio_device(const char *p, dev_t *dev) {
149 struct stat st;
150 int r;
151
152 assert(p);
153 assert(dev);
154
155 r = stat(p, &st);
4a62c710
MS
156 if (r < 0)
157 return log_warning_errno(errno, "Couldn't stat device %s: %m", p);
8e274523 158
4ad49000
LP
159 if (S_ISBLK(st.st_mode))
160 *dev = st.st_rdev;
161 else if (major(st.st_dev) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
164 *dev = st.st_dev;
165
166 /* If this is a partition, try to get the originating
167 * block device */
168 block_get_whole_disk(*dev, dev);
169 } else {
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
171 return -ENODEV;
172 }
8e274523 173
8e274523 174 return 0;
8e274523
LP
175}
176
4ad49000
LP
177static int whitelist_device(const char *path, const char *node, const char *acc) {
178 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
179 struct stat st;
8c6db833 180 int r;
8e274523 181
4ad49000
LP
182 assert(path);
183 assert(acc);
8e274523 184
4ad49000
LP
185 if (stat(node, &st) < 0) {
186 log_warning("Couldn't stat device %s", node);
187 return -errno;
188 }
189
190 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
191 log_warning("%s is not a device.", node);
192 return -ENODEV;
193 }
194
195 sprintf(buf,
196 "%c %u:%u %s",
197 S_ISCHR(st.st_mode) ? 'c' : 'b',
198 major(st.st_rdev), minor(st.st_rdev),
199 acc);
200
201 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
202 if (r < 0)
203 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
4ad49000
LP
204
205 return r;
8e274523
LP
206}
207
90060676
LP
208static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
209 _cleanup_fclose_ FILE *f = NULL;
210 char line[LINE_MAX];
211 bool good = false;
212 int r;
213
214 assert(path);
215 assert(acc);
216 assert(type == 'b' || type == 'c');
217
218 f = fopen("/proc/devices", "re");
4a62c710
MS
219 if (!f)
220 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
90060676
LP
221
222 FOREACH_LINE(line, f, goto fail) {
223 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
224 unsigned maj;
225
226 truncate_nl(line);
227
228 if (type == 'c' && streq(line, "Character devices:")) {
229 good = true;
230 continue;
231 }
232
233 if (type == 'b' && streq(line, "Block devices:")) {
234 good = true;
235 continue;
236 }
237
238 if (isempty(line)) {
239 good = false;
240 continue;
241 }
242
243 if (!good)
244 continue;
245
246 p = strstrip(line);
247
248 w = strpbrk(p, WHITESPACE);
249 if (!w)
250 continue;
251 *w = 0;
252
253 r = safe_atou(p, &maj);
254 if (r < 0)
255 continue;
256 if (maj <= 0)
257 continue;
258
259 w++;
260 w += strspn(w, WHITESPACE);
e41969e3
LP
261
262 if (fnmatch(name, w, 0) != 0)
90060676
LP
263 continue;
264
265 sprintf(buf,
266 "%c %u:* %s",
267 type,
268 maj,
269 acc);
270
271 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
272 if (r < 0)
273 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
90060676
LP
274 }
275
276 return 0;
277
278fail:
56f64d95 279 log_warning_errno(errno, "Failed to read /proc/devices: %m");
90060676
LP
280 return -errno;
281}
282
db785129 283void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
01efdf13 284 bool is_root;
4ad49000
LP
285 int r;
286
287 assert(c);
288 assert(path);
8e274523 289
4ad49000
LP
290 if (mask == 0)
291 return;
8e274523 292
01efdf13
LP
293 /* Some cgroup attributes are not support on the root cgroup,
294 * hence silently ignore */
295 is_root = isempty(path) || path_equal(path, "/");
296
297 if ((mask & CGROUP_CPU) && !is_root) {
b2f8b02e 298 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 299
db785129 300 sprintf(buf, "%lu\n",
d81afec1 301 IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
db785129 302 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
4ad49000 303 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
1aeab12b
LP
304 if (r < 0)
305 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.shares on %s: %s", path, strerror(-r));
b2f8b02e 306
9a054909 307 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e 308 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
1aeab12b
LP
309 if (r < 0)
310 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
b2f8b02e 311
3a43da28 312 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
9a054909 313 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
314 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
315 } else
316 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
1aeab12b
LP
317 if (r < 0)
318 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
4ad49000
LP
319 }
320
321 if (mask & CGROUP_BLKIO) {
322 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
323 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
324 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
325 CGroupBlockIODeviceWeight *w;
326 CGroupBlockIODeviceBandwidth *b;
327
01efdf13 328 if (!is_root) {
d81afec1 329 sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
db785129 330 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
01efdf13 331 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
1aeab12b
LP
332 if (r < 0)
333 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 334
01efdf13
LP
335 /* FIXME: no way to reset this list */
336 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
337 dev_t dev;
4ad49000 338
01efdf13
LP
339 r = lookup_blkio_device(w->path, &dev);
340 if (r < 0)
341 continue;
8e274523 342
01efdf13
LP
343 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
344 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
1aeab12b
LP
345 if (r < 0)
346 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
01efdf13 347 }
4ad49000
LP
348 }
349
350 /* FIXME: no way to reset this list */
351 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
352 const char *a;
353 dev_t dev;
354
355 r = lookup_blkio_device(b->path, &dev);
356 if (r < 0)
357 continue;
358
359 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
360
361 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
362 r = cg_set_attribute("blkio", path, a, buf);
1aeab12b
LP
363 if (r < 0)
364 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 365 }
8e274523
LP
366 }
367
4ad49000 368 if (mask & CGROUP_MEMORY) {
6a94f2e9 369 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
370 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
371
6a94f2e9
G
372 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
373 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
374 } else
375 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 376
1aeab12b
LP
377 if (r < 0)
378 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 379 }
8e274523 380
01efdf13 381 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 382 CGroupDeviceAllow *a;
8e274523 383
4ad49000
LP
384 if (c->device_allow || c->device_policy != CGROUP_AUTO)
385 r = cg_set_attribute("devices", path, "devices.deny", "a");
386 else
387 r = cg_set_attribute("devices", path, "devices.allow", "a");
1aeab12b
LP
388 if (r < 0)
389 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 390
4ad49000
LP
391 if (c->device_policy == CGROUP_CLOSED ||
392 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
393 static const char auto_devices[] =
7d711efb
LP
394 "/dev/null\0" "rwm\0"
395 "/dev/zero\0" "rwm\0"
396 "/dev/full\0" "rwm\0"
397 "/dev/random\0" "rwm\0"
398 "/dev/urandom\0" "rwm\0"
399 "/dev/tty\0" "rwm\0"
400 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
401
402 const char *x, *y;
403
404 NULSTR_FOREACH_PAIR(x, y, auto_devices)
405 whitelist_device(path, x, y);
7d711efb
LP
406
407 whitelist_major(path, "pts", 'c', "rw");
408 whitelist_major(path, "kdbus", 'c', "rw");
409 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
410 }
411
412 LIST_FOREACH(device_allow, a, c->device_allow) {
413 char acc[4];
414 unsigned k = 0;
415
416 if (a->r)
417 acc[k++] = 'r';
418 if (a->w)
419 acc[k++] = 'w';
420 if (a->m)
421 acc[k++] = 'm';
fb385181 422
4ad49000
LP
423 if (k == 0)
424 continue;
fb385181 425
4ad49000 426 acc[k++] = 0;
90060676
LP
427
428 if (startswith(a->path, "/dev/"))
429 whitelist_device(path, a->path, acc);
430 else if (startswith(a->path, "block-"))
431 whitelist_major(path, a->path + 6, 'b', acc);
432 else if (startswith(a->path, "char-"))
433 whitelist_major(path, a->path + 5, 'c', acc);
434 else
435 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
436 }
437 }
fb385181
LP
438}
439
db785129 440CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
4ad49000 441 CGroupControllerMask mask = 0;
8e274523 442
4ad49000 443 /* Figure out which controllers we need */
8e274523 444
b2f8b02e 445 if (c->cpu_accounting ||
db785129
LP
446 c->cpu_shares != (unsigned long) -1 ||
447 c->startup_cpu_shares != (unsigned long) -1 ||
3a43da28 448 c->cpu_quota_per_sec_usec != USEC_INFINITY)
4ad49000 449 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 450
4ad49000 451 if (c->blockio_accounting ||
db785129
LP
452 c->blockio_weight != (unsigned long) -1 ||
453 c->startup_blockio_weight != (unsigned long) -1 ||
4ad49000 454 c->blockio_device_weights ||
db785129 455 c->blockio_device_bandwidths)
4ad49000 456 mask |= CGROUP_BLKIO;
ecedd90f 457
4ad49000 458 if (c->memory_accounting ||
ddca82ac 459 c->memory_limit != (uint64_t) -1)
4ad49000 460 mask |= CGROUP_MEMORY;
8e274523 461
a931ad47
LP
462 if (c->device_allow ||
463 c->device_policy != CGROUP_AUTO)
4ad49000
LP
464 mask |= CGROUP_DEVICE;
465
466 return mask;
8e274523
LP
467}
468
bc432dc7 469CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 470 CGroupContext *c;
8e274523 471
4ad49000
LP
472 c = unit_get_cgroup_context(u);
473 if (!c)
474 return 0;
8e274523 475
a931ad47
LP
476 /* If delegation is turned on, then turn on all cgroups,
477 * unless the process we fork into it is known to drop
478 * privileges anyway, and shouldn't get access to the
479 * controllers anyway. */
480
481 if (c->delegate) {
482 ExecContext *e;
483
484 e = unit_get_exec_context(u);
485 if (!e || exec_context_maintains_privileges(e))
486 return _CGROUP_CONTROLLER_MASK_ALL;
487 }
488
db785129 489 return cgroup_context_get_mask(c);
8e274523
LP
490}
491
bc432dc7 492CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 493 assert(u);
bc432dc7
LP
494
495 if (u->cgroup_members_mask_valid)
496 return u->cgroup_members_mask;
497
498 u->cgroup_members_mask = 0;
499
500 if (u->type == UNIT_SLICE) {
501 Unit *member;
502 Iterator i;
503
504 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
505
506 if (member == u)
507 continue;
508
d4fdc205 509 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
510 continue;
511
512 u->cgroup_members_mask |=
513 unit_get_cgroup_mask(member) |
514 unit_get_members_mask(member);
515 }
516 }
517
518 u->cgroup_members_mask_valid = true;
6414b7c9 519 return u->cgroup_members_mask;
246aa6dd
LP
520}
521
bc432dc7 522CGroupControllerMask unit_get_siblings_mask(Unit *u) {
4ad49000 523 assert(u);
246aa6dd 524
bc432dc7 525 if (UNIT_ISSET(u->slice))
637f421e 526 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 527
637f421e 528 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
529}
530
bc432dc7 531CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
532 CGroupControllerMask mask;
533
534 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
535 mask &= u->manager->cgroup_supported;
536
537 return mask;
538}
539
540/* Recurse from a unit up through its containing slices, propagating
541 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
542void unit_update_cgroup_members_masks(Unit *u) {
543 CGroupControllerMask m;
544 bool more;
545
546 assert(u);
547
548 /* Calculate subtree mask */
549 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
550
551 /* See if anything changed from the previous invocation. If
552 * not, we're done. */
553 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
554 return;
555
556 more =
557 u->cgroup_subtree_mask_valid &&
558 ((m & ~u->cgroup_subtree_mask) != 0) &&
559 ((~m & u->cgroup_subtree_mask) == 0);
560
561 u->cgroup_subtree_mask = m;
562 u->cgroup_subtree_mask_valid = true;
563
6414b7c9
DS
564 if (UNIT_ISSET(u->slice)) {
565 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
566
567 if (more)
568 /* There's more set now than before. We
569 * propagate the new mask to the parent's mask
570 * (not caring if it actually was valid or
571 * not). */
572
573 s->cgroup_members_mask |= m;
574
575 else
576 /* There's less set now than before (or we
577 * don't know), we need to recalculate
578 * everything, so let's invalidate the
579 * parent's members mask */
580
581 s->cgroup_members_mask_valid = false;
582
583 /* And now make sure that this change also hits our
584 * grandparents */
585 unit_update_cgroup_members_masks(s);
6414b7c9
DS
586 }
587}
588
03b90d4b
LP
589static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
590 Unit *u = userdata;
591
592 assert(mask != 0);
593 assert(u);
594
595 while (u) {
596 if (u->cgroup_path &&
597 u->cgroup_realized &&
598 (u->cgroup_realized_mask & mask) == mask)
599 return u->cgroup_path;
600
601 u = UNIT_DEREF(u->slice);
602 }
603
604 return NULL;
605}
606
4ad49000 607static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
0cd385d3 608 CGroupContext *c;
bc432dc7 609 int r;
64747e2d 610
4ad49000 611 assert(u);
64747e2d 612
0cd385d3
LP
613 c = unit_get_cgroup_context(u);
614 if (!c)
615 return 0;
616
7b3fd631
LP
617 if (!u->cgroup_path) {
618 _cleanup_free_ char *path = NULL;
64747e2d 619
7b3fd631
LP
620 path = unit_default_cgroup_path(u);
621 if (!path)
622 return log_oom();
623
624 r = hashmap_put(u->manager->cgroup_unit, path, u);
625 if (r < 0) {
626 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
627 return r;
628 }
629 if (r > 0) {
630 u->cgroup_path = path;
631 path = NULL;
632 }
b58b8e11
HH
633 }
634
03b90d4b
LP
635 /* First, create our own group */
636 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
23bbb0de
MS
637 if (r < 0)
638 return log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path);
03b90d4b
LP
639
640 /* Keep track that this is now realized */
4ad49000 641 u->cgroup_realized = true;
bc432dc7 642 u->cgroup_realized_mask = mask;
4ad49000 643
0cd385d3
LP
644 if (u->type != UNIT_SLICE && !c->delegate) {
645
646 /* Then, possibly move things over, but not if
647 * subgroups may contain processes, which is the case
648 * for slice and delegation units. */
649 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
650 if (r < 0)
651 log_warning_errno(r, "Failed to migrate cgroup from to %s: %m", u->cgroup_path);
652 }
03b90d4b 653
64747e2d
LP
654 return 0;
655}
656
7b3fd631
LP
657int unit_attach_pids_to_cgroup(Unit *u) {
658 int r;
659 assert(u);
660
661 r = unit_realize_cgroup(u);
662 if (r < 0)
663 return r;
664
665 r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
666 if (r < 0)
667 return r;
668
669 return 0;
670}
671
6414b7c9 672static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
673 assert(u);
674
675 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
676}
677
678/* Check if necessary controllers and attributes for a unit are in place.
679 *
680 * If so, do nothing.
681 * If not, create paths, move processes over, and set attributes.
682 *
683 * Returns 0 on success and < 0 on failure. */
db785129 684static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
4ad49000 685 CGroupControllerMask mask;
6414b7c9 686 int r;
64747e2d 687
4ad49000 688 assert(u);
64747e2d 689
4ad49000 690 if (u->in_cgroup_queue) {
71fda00f 691 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
692 u->in_cgroup_queue = false;
693 }
64747e2d 694
6414b7c9 695 mask = unit_get_target_mask(u);
64747e2d 696
6414b7c9 697 if (unit_has_mask_realized(u, mask))
0a1eb06d 698 return 0;
64747e2d 699
4ad49000 700 /* First, realize parents */
6414b7c9 701 if (UNIT_ISSET(u->slice)) {
db785129 702 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
703 if (r < 0)
704 return r;
705 }
4ad49000
LP
706
707 /* And then do the real work */
6414b7c9
DS
708 r = unit_create_cgroups(u, mask);
709 if (r < 0)
710 return r;
711
712 /* Finally, apply the necessary attributes. */
db785129 713 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
6414b7c9
DS
714
715 return 0;
64747e2d
LP
716}
717
4ad49000 718static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 719
4ad49000
LP
720 if (u->in_cgroup_queue)
721 return;
8e274523 722
71fda00f 723 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
724 u->in_cgroup_queue = true;
725}
8c6db833 726
4ad49000 727unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 728 ManagerState state;
4ad49000 729 unsigned n = 0;
db785129 730 Unit *i;
6414b7c9 731 int r;
ecedd90f 732
db785129
LP
733 state = manager_state(m);
734
4ad49000
LP
735 while ((i = m->cgroup_queue)) {
736 assert(i->in_cgroup_queue);
ecedd90f 737
db785129 738 r = unit_realize_cgroup_now(i, state);
6414b7c9 739 if (r < 0)
da927ba9 740 log_warning_errno(r, "Failed to realize cgroups for queued unit %s: %m", i->id);
0a1eb06d 741
4ad49000
LP
742 n++;
743 }
ecedd90f 744
4ad49000 745 return n;
8e274523
LP
746}
747
4ad49000
LP
748static void unit_queue_siblings(Unit *u) {
749 Unit *slice;
ca949c9d 750
4ad49000
LP
751 /* This adds the siblings of the specified unit and the
752 * siblings of all parent units to the cgroup queue. (But
753 * neither the specified unit itself nor the parents.) */
754
755 while ((slice = UNIT_DEREF(u->slice))) {
756 Iterator i;
757 Unit *m;
8f53a7b8 758
4ad49000
LP
759 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
760 if (m == u)
761 continue;
8e274523 762
6414b7c9
DS
763 /* Skip units that have a dependency on the slice
764 * but aren't actually in it. */
4ad49000 765 if (UNIT_DEREF(m->slice) != slice)
50159e6a 766 continue;
8e274523 767
6414b7c9
DS
768 /* No point in doing cgroup application for units
769 * without active processes. */
770 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
771 continue;
772
773 /* If the unit doesn't need any new controllers
774 * and has current ones realized, it doesn't need
775 * any changes. */
776 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
777 continue;
778
4ad49000 779 unit_add_to_cgroup_queue(m);
50159e6a
LP
780 }
781
4ad49000 782 u = slice;
8e274523 783 }
4ad49000
LP
784}
785
0a1eb06d 786int unit_realize_cgroup(Unit *u) {
4ad49000
LP
787 CGroupContext *c;
788
789 assert(u);
790
791 c = unit_get_cgroup_context(u);
792 if (!c)
0a1eb06d 793 return 0;
8e274523 794
4ad49000
LP
795 /* So, here's the deal: when realizing the cgroups for this
796 * unit, we need to first create all parents, but there's more
797 * actually: for the weight-based controllers we also need to
798 * make sure that all our siblings (i.e. units that are in the
73e231ab 799 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
800 * would become very uneven as each of their processes would
801 * get as much resources as all our group together. This call
802 * will synchronously create the parent cgroups, but will
803 * defer work on the siblings to the next event loop
804 * iteration. */
ca949c9d 805
4ad49000
LP
806 /* Add all sibling slices to the cgroup queue. */
807 unit_queue_siblings(u);
808
6414b7c9 809 /* And realize this one now (and apply the values) */
db785129 810 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
811}
812
b1491eba 813void unit_destroy_cgroup_if_empty(Unit *u) {
8e274523
LP
814 int r;
815
4ad49000 816 assert(u);
8e274523 817
4ad49000
LP
818 if (!u->cgroup_path)
819 return;
8e274523 820
13b84ec7 821 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
dab5bf85 822 if (r < 0) {
da927ba9 823 log_debug_errno(r, "Failed to destroy cgroup %s: %m", u->cgroup_path);
dab5bf85
RL
824 return;
825 }
8e274523 826
0a1eb06d
LP
827 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
828
4ad49000
LP
829 free(u->cgroup_path);
830 u->cgroup_path = NULL;
831 u->cgroup_realized = false;
bc432dc7 832 u->cgroup_realized_mask = 0;
8e274523
LP
833}
834
4ad49000
LP
835pid_t unit_search_main_pid(Unit *u) {
836 _cleanup_fclose_ FILE *f = NULL;
837 pid_t pid = 0, npid, mypid;
838
839 assert(u);
840
841 if (!u->cgroup_path)
842 return 0;
843
844 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
845 return 0;
846
847 mypid = getpid();
848 while (cg_read_pid(f, &npid) > 0) {
849 pid_t ppid;
850
851 if (npid == pid)
852 continue;
8e274523 853
4ad49000
LP
854 /* Ignore processes that aren't our kids */
855 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
856 continue;
8e274523 857
4ad49000
LP
858 if (pid != 0) {
859 /* Dang, there's more than one daemonized PID
860 in this group, so we don't know what process
861 is the main process. */
862 pid = 0;
863 break;
864 }
8e274523 865
4ad49000 866 pid = npid;
8e274523
LP
867 }
868
4ad49000 869 return pid;
8e274523
LP
870}
871
8e274523 872int manager_setup_cgroup(Manager *m) {
9444b1f2 873 _cleanup_free_ char *path = NULL;
8e274523 874 int r;
8e274523
LP
875
876 assert(m);
877
35d2e7ec 878 /* 1. Determine hierarchy */
9444b1f2
LP
879 free(m->cgroup_root);
880 m->cgroup_root = NULL;
881
882 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
23bbb0de
MS
883 if (r < 0)
884 return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
8e274523 885
15c60e99
LP
886 /* LEGACY: Already in /system.slice? If so, let's cut this
887 * off. This is to support live upgrades from older systemd
888 * versions where PID 1 was moved there. */
9444b1f2 889 if (m->running_as == SYSTEMD_SYSTEM) {
0d8c31ff
ZJS
890 char *e;
891
9444b1f2 892 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
893 if (!e)
894 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
895 if (e)
896 *e = 0;
0baf24dd 897 }
7ccfb64a 898
9444b1f2
LP
899 /* And make sure to store away the root value without trailing
900 * slash, even for the root dir, so that we can easily prepend
901 * it everywhere. */
902 if (streq(m->cgroup_root, "/"))
903 m->cgroup_root[0] = 0;
8e274523 904
35d2e7ec 905 /* 2. Show data */
9444b1f2 906 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
23bbb0de
MS
907 if (r < 0)
908 return log_error_errno(r, "Cannot find cgroup mount point: %m");
8e274523 909
c6c18be3 910 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
0d8c31ff 911 if (!m->test_run) {
c6c18be3 912
0d8c31ff
ZJS
913 /* 3. Install agent */
914 if (m->running_as == SYSTEMD_SYSTEM) {
915 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
916 if (r < 0)
da927ba9 917 log_warning_errno(r, "Failed to install release agent, ignoring: %m");
0d8c31ff
ZJS
918 else if (r > 0)
919 log_debug("Installed release agent.");
920 else
921 log_debug("Release agent already installed.");
922 }
8e274523 923
0d8c31ff
ZJS
924 /* 4. Make sure we are in the root cgroup */
925 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
23bbb0de
MS
926 if (r < 0)
927 return log_error_errno(r, "Failed to create root cgroup hierarchy: %m");
c6c18be3 928
0d8c31ff
ZJS
929 /* 5. And pin it, so that it cannot be unmounted */
930 safe_close(m->pin_cgroupfs_fd);
c6c18be3 931
0d8c31ff 932 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
4a62c710
MS
933 if (m->pin_cgroupfs_fd < 0)
934 return log_error_errno(errno, "Failed to open pin file: %m");
0d8c31ff
ZJS
935
936 /* 6. Always enable hierarchial support if it exists... */
937 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
c6c18be3
LP
938 }
939
0d8c31ff 940 /* 7. Figure out which controllers are supported */
4ad49000 941 m->cgroup_supported = cg_mask_supported();
9156e799 942
a32360f1 943 return 0;
8e274523
LP
944}
945
c6c18be3 946void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
947 assert(m);
948
9444b1f2
LP
949 /* We can't really delete the group, since we are in it. But
950 * let's trim it. */
951 if (delete && m->cgroup_root)
952 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 953
03e334a1 954 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 955
9444b1f2
LP
956 free(m->cgroup_root);
957 m->cgroup_root = NULL;
8e274523
LP
958}
959
4ad49000 960Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 961 char *p;
4ad49000 962 Unit *u;
acb14d31
LP
963
964 assert(m);
965 assert(cgroup);
acb14d31 966
4ad49000
LP
967 u = hashmap_get(m->cgroup_unit, cgroup);
968 if (u)
969 return u;
acb14d31 970
8e70580b 971 p = strdupa(cgroup);
acb14d31
LP
972 for (;;) {
973 char *e;
974
975 e = strrchr(p, '/');
4ad49000
LP
976 if (e == p || !e)
977 return NULL;
acb14d31
LP
978
979 *e = 0;
980
4ad49000
LP
981 u = hashmap_get(m->cgroup_unit, p);
982 if (u)
983 return u;
acb14d31
LP
984 }
985}
986
4ad49000
LP
987Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
988 _cleanup_free_ char *cgroup = NULL;
acb14d31 989 int r;
8e274523 990
8c47c732
LP
991 assert(m);
992
993 if (pid <= 1)
994 return NULL;
995
4ad49000
LP
996 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
997 if (r < 0)
6dde1f33
LP
998 return NULL;
999
4ad49000 1000 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 1001}
4fbf50b3 1002
4ad49000
LP
1003int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
1004 Unit *u;
1005 int r;
4fbf50b3 1006
4ad49000
LP
1007 assert(m);
1008 assert(cgroup);
4fbf50b3 1009
4ad49000 1010 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 1011 if (u) {
06025d91
LP
1012 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
1013 if (r > 0) {
1014 if (UNIT_VTABLE(u)->notify_cgroup_empty)
1015 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 1016
06025d91
LP
1017 unit_add_to_gc_queue(u);
1018 }
b56c28c3 1019 }
2633eb83 1020
4ad49000 1021 return 0;
4fbf50b3
LP
1022}
1023
4ad49000
LP
1024static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
1025 [CGROUP_AUTO] = "auto",
1026 [CGROUP_CLOSED] = "closed",
1027 [CGROUP_STRICT] = "strict",
1028};
4fbf50b3 1029
4ad49000 1030DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);