]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
treewide: no need to negate errno for log_*_errno()
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
4ad49000 6 Copyright 2013 Lennart Poettering
8e274523
LP
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
c6c18be3 22#include <fcntl.h>
e41969e3 23#include <fnmatch.h>
8c6db833 24
9eb977db 25#include "path-util.h"
9444b1f2 26#include "special.h"
4ad49000
LP
27#include "cgroup-util.h"
28#include "cgroup.h"
8e274523 29
9a054909
LP
30#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
31
4ad49000
LP
32void cgroup_context_init(CGroupContext *c) {
33 assert(c);
34
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
37
db785129
LP
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
ddca82ac 40 c->memory_limit = (uint64_t) -1;
db785129
LP
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
b2f8b02e 43
3a43da28 44 c->cpu_quota_per_sec_usec = USEC_INFINITY;
4ad49000 45}
8e274523 46
4ad49000
LP
47void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
48 assert(c);
49 assert(a);
50
71fda00f 51 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
52 free(a->path);
53 free(a);
54}
55
56void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
57 assert(c);
58 assert(w);
59
71fda00f 60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
61 free(w->path);
62 free(w);
63}
64
65void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
66 assert(c);
8e274523 67 assert(b);
8e274523 68
71fda00f 69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
70 free(b->path);
71 free(b);
72}
73
74void cgroup_context_done(CGroupContext *c) {
75 assert(c);
76
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
79
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
82
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
85}
86
87void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
90 CGroupDeviceAllow *a;
9a054909 91 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
92
93 assert(c);
94 assert(f);
95
96 prefix = strempty(prefix);
97
98 fprintf(f,
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
102 "%sCPUShares=%lu\n"
95ae05c0 103 "%sStartupCPUShares=%lu\n"
b2f8b02e 104 "%sCPUQuotaPerSecSec=%s\n"
112a7f46 105 "%sBlockIOWeight=%lu\n"
95ae05c0 106 "%sStartupBlockIOWeight=%lu\n"
4ad49000 107 "%sMemoryLimit=%" PRIu64 "\n"
a931ad47
LP
108 "%sDevicePolicy=%s\n"
109 "%sDelegate=%s\n",
4ad49000
LP
110 prefix, yes_no(c->cpu_accounting),
111 prefix, yes_no(c->blockio_accounting),
112 prefix, yes_no(c->memory_accounting),
113 prefix, c->cpu_shares,
95ae05c0 114 prefix, c->startup_cpu_shares,
b1d6dcf5 115 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
4ad49000 116 prefix, c->blockio_weight,
95ae05c0 117 prefix, c->startup_blockio_weight,
4ad49000 118 prefix, c->memory_limit,
a931ad47
LP
119 prefix, cgroup_device_policy_to_string(c->device_policy),
120 prefix, yes_no(c->delegate));
4ad49000
LP
121
122 LIST_FOREACH(device_allow, a, c->device_allow)
123 fprintf(f,
124 "%sDeviceAllow=%s %s%s%s\n",
125 prefix,
126 a->path,
127 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
128
129 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
130 fprintf(f,
8e7076ca 131 "%sBlockIODeviceWeight=%s %lu",
4ad49000
LP
132 prefix,
133 w->path,
134 w->weight);
135
136 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
137 char buf[FORMAT_BYTES_MAX];
138
139 fprintf(f,
140 "%s%s=%s %s\n",
141 prefix,
142 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
143 b->path,
144 format_bytes(buf, sizeof(buf), b->bandwidth));
145 }
146}
147
148static int lookup_blkio_device(const char *p, dev_t *dev) {
149 struct stat st;
150 int r;
151
152 assert(p);
153 assert(dev);
154
155 r = stat(p, &st);
ab1f0633 156 if (r < 0) {
4ad49000
LP
157 log_warning("Couldn't stat device %s: %m", p);
158 return -errno;
ab1f0633 159 }
8e274523 160
4ad49000
LP
161 if (S_ISBLK(st.st_mode))
162 *dev = st.st_rdev;
163 else if (major(st.st_dev) != 0) {
164 /* If this is not a device node then find the block
165 * device this file is stored on */
166 *dev = st.st_dev;
167
168 /* If this is a partition, try to get the originating
169 * block device */
170 block_get_whole_disk(*dev, dev);
171 } else {
172 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
173 return -ENODEV;
174 }
8e274523 175
8e274523 176 return 0;
8e274523
LP
177}
178
4ad49000
LP
179static int whitelist_device(const char *path, const char *node, const char *acc) {
180 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
181 struct stat st;
8c6db833 182 int r;
8e274523 183
4ad49000
LP
184 assert(path);
185 assert(acc);
8e274523 186
4ad49000
LP
187 if (stat(node, &st) < 0) {
188 log_warning("Couldn't stat device %s", node);
189 return -errno;
190 }
191
192 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
193 log_warning("%s is not a device.", node);
194 return -ENODEV;
195 }
196
197 sprintf(buf,
198 "%c %u:%u %s",
199 S_ISCHR(st.st_mode) ? 'c' : 'b',
200 major(st.st_rdev), minor(st.st_rdev),
201 acc);
202
203 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
204 if (r < 0)
205 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
4ad49000
LP
206
207 return r;
8e274523
LP
208}
209
90060676
LP
210static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
211 _cleanup_fclose_ FILE *f = NULL;
212 char line[LINE_MAX];
213 bool good = false;
214 int r;
215
216 assert(path);
217 assert(acc);
218 assert(type == 'b' || type == 'c');
219
220 f = fopen("/proc/devices", "re");
221 if (!f) {
222 log_warning("Cannot open /proc/devices to resolve %s (%c): %m", name, type);
223 return -errno;
224 }
225
226 FOREACH_LINE(line, f, goto fail) {
227 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
228 unsigned maj;
229
230 truncate_nl(line);
231
232 if (type == 'c' && streq(line, "Character devices:")) {
233 good = true;
234 continue;
235 }
236
237 if (type == 'b' && streq(line, "Block devices:")) {
238 good = true;
239 continue;
240 }
241
242 if (isempty(line)) {
243 good = false;
244 continue;
245 }
246
247 if (!good)
248 continue;
249
250 p = strstrip(line);
251
252 w = strpbrk(p, WHITESPACE);
253 if (!w)
254 continue;
255 *w = 0;
256
257 r = safe_atou(p, &maj);
258 if (r < 0)
259 continue;
260 if (maj <= 0)
261 continue;
262
263 w++;
264 w += strspn(w, WHITESPACE);
e41969e3
LP
265
266 if (fnmatch(name, w, 0) != 0)
90060676
LP
267 continue;
268
269 sprintf(buf,
270 "%c %u:* %s",
271 type,
272 maj,
273 acc);
274
275 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b
LP
276 if (r < 0)
277 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set devices.allow on %s: %s", path, strerror(-r));
90060676
LP
278 }
279
280 return 0;
281
282fail:
283 log_warning("Failed to read /proc/devices: %m");
284 return -errno;
285}
286
db785129 287void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
01efdf13 288 bool is_root;
4ad49000
LP
289 int r;
290
291 assert(c);
292 assert(path);
8e274523 293
4ad49000
LP
294 if (mask == 0)
295 return;
8e274523 296
01efdf13
LP
297 /* Some cgroup attributes are not support on the root cgroup,
298 * hence silently ignore */
299 is_root = isempty(path) || path_equal(path, "/");
300
301 if ((mask & CGROUP_CPU) && !is_root) {
b2f8b02e 302 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 303
db785129 304 sprintf(buf, "%lu\n",
d81afec1 305 IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
db785129 306 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
4ad49000 307 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
1aeab12b
LP
308 if (r < 0)
309 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.shares on %s: %s", path, strerror(-r));
b2f8b02e 310
9a054909 311 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e 312 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
1aeab12b
LP
313 if (r < 0)
314 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r));
b2f8b02e 315
3a43da28 316 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
9a054909 317 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
318 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
319 } else
320 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
1aeab12b
LP
321 if (r < 0)
322 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r));
4ad49000
LP
323 }
324
325 if (mask & CGROUP_BLKIO) {
326 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
327 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
328 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
329 CGroupBlockIODeviceWeight *w;
330 CGroupBlockIODeviceBandwidth *b;
331
01efdf13 332 if (!is_root) {
d81afec1 333 sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
db785129 334 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
01efdf13 335 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
1aeab12b
LP
336 if (r < 0)
337 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight on %s: %s", path, strerror(-r));
4ad49000 338
01efdf13
LP
339 /* FIXME: no way to reset this list */
340 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
341 dev_t dev;
4ad49000 342
01efdf13
LP
343 r = lookup_blkio_device(w->path, &dev);
344 if (r < 0)
345 continue;
8e274523 346
01efdf13
LP
347 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
348 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
1aeab12b
LP
349 if (r < 0)
350 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
01efdf13 351 }
4ad49000
LP
352 }
353
354 /* FIXME: no way to reset this list */
355 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
356 const char *a;
357 dev_t dev;
358
359 r = lookup_blkio_device(b->path, &dev);
360 if (r < 0)
361 continue;
362
363 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
364
365 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
366 r = cg_set_attribute("blkio", path, a, buf);
1aeab12b
LP
367 if (r < 0)
368 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set %s on %s: %s", a, path, strerror(-r));
d686d8a9 369 }
8e274523
LP
370 }
371
4ad49000 372 if (mask & CGROUP_MEMORY) {
6a94f2e9 373 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
374 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
375
6a94f2e9
G
376 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
377 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
378 } else
379 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
8e274523 380
1aeab12b
LP
381 if (r < 0)
382 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
4ad49000 383 }
8e274523 384
01efdf13 385 if ((mask & CGROUP_DEVICE) && !is_root) {
4ad49000 386 CGroupDeviceAllow *a;
8e274523 387
4ad49000
LP
388 if (c->device_allow || c->device_policy != CGROUP_AUTO)
389 r = cg_set_attribute("devices", path, "devices.deny", "a");
390 else
391 r = cg_set_attribute("devices", path, "devices.allow", "a");
1aeab12b
LP
392 if (r < 0)
393 log_full(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, "Failed to reset devices.list on %s: %s", path, strerror(-r));
fb385181 394
4ad49000
LP
395 if (c->device_policy == CGROUP_CLOSED ||
396 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
397 static const char auto_devices[] =
7d711efb
LP
398 "/dev/null\0" "rwm\0"
399 "/dev/zero\0" "rwm\0"
400 "/dev/full\0" "rwm\0"
401 "/dev/random\0" "rwm\0"
402 "/dev/urandom\0" "rwm\0"
403 "/dev/tty\0" "rwm\0"
404 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
405
406 const char *x, *y;
407
408 NULSTR_FOREACH_PAIR(x, y, auto_devices)
409 whitelist_device(path, x, y);
7d711efb
LP
410
411 whitelist_major(path, "pts", 'c', "rw");
412 whitelist_major(path, "kdbus", 'c', "rw");
413 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
414 }
415
416 LIST_FOREACH(device_allow, a, c->device_allow) {
417 char acc[4];
418 unsigned k = 0;
419
420 if (a->r)
421 acc[k++] = 'r';
422 if (a->w)
423 acc[k++] = 'w';
424 if (a->m)
425 acc[k++] = 'm';
fb385181 426
4ad49000
LP
427 if (k == 0)
428 continue;
fb385181 429
4ad49000 430 acc[k++] = 0;
90060676
LP
431
432 if (startswith(a->path, "/dev/"))
433 whitelist_device(path, a->path, acc);
434 else if (startswith(a->path, "block-"))
435 whitelist_major(path, a->path + 6, 'b', acc);
436 else if (startswith(a->path, "char-"))
437 whitelist_major(path, a->path + 5, 'c', acc);
438 else
439 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
440 }
441 }
fb385181
LP
442}
443
db785129 444CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
4ad49000 445 CGroupControllerMask mask = 0;
8e274523 446
4ad49000 447 /* Figure out which controllers we need */
8e274523 448
b2f8b02e 449 if (c->cpu_accounting ||
db785129
LP
450 c->cpu_shares != (unsigned long) -1 ||
451 c->startup_cpu_shares != (unsigned long) -1 ||
3a43da28 452 c->cpu_quota_per_sec_usec != USEC_INFINITY)
4ad49000 453 mask |= CGROUP_CPUACCT | CGROUP_CPU;
ecedd90f 454
4ad49000 455 if (c->blockio_accounting ||
db785129
LP
456 c->blockio_weight != (unsigned long) -1 ||
457 c->startup_blockio_weight != (unsigned long) -1 ||
4ad49000 458 c->blockio_device_weights ||
db785129 459 c->blockio_device_bandwidths)
4ad49000 460 mask |= CGROUP_BLKIO;
ecedd90f 461
4ad49000 462 if (c->memory_accounting ||
ddca82ac 463 c->memory_limit != (uint64_t) -1)
4ad49000 464 mask |= CGROUP_MEMORY;
8e274523 465
a931ad47
LP
466 if (c->device_allow ||
467 c->device_policy != CGROUP_AUTO)
4ad49000
LP
468 mask |= CGROUP_DEVICE;
469
470 return mask;
8e274523
LP
471}
472
bc432dc7 473CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
4ad49000 474 CGroupContext *c;
8e274523 475
4ad49000
LP
476 c = unit_get_cgroup_context(u);
477 if (!c)
478 return 0;
8e274523 479
a931ad47
LP
480 /* If delegation is turned on, then turn on all cgroups,
481 * unless the process we fork into it is known to drop
482 * privileges anyway, and shouldn't get access to the
483 * controllers anyway. */
484
485 if (c->delegate) {
486 ExecContext *e;
487
488 e = unit_get_exec_context(u);
489 if (!e || exec_context_maintains_privileges(e))
490 return _CGROUP_CONTROLLER_MASK_ALL;
491 }
492
db785129 493 return cgroup_context_get_mask(c);
8e274523
LP
494}
495
bc432dc7 496CGroupControllerMask unit_get_members_mask(Unit *u) {
4ad49000 497 assert(u);
bc432dc7
LP
498
499 if (u->cgroup_members_mask_valid)
500 return u->cgroup_members_mask;
501
502 u->cgroup_members_mask = 0;
503
504 if (u->type == UNIT_SLICE) {
505 Unit *member;
506 Iterator i;
507
508 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
509
510 if (member == u)
511 continue;
512
d4fdc205 513 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
514 continue;
515
516 u->cgroup_members_mask |=
517 unit_get_cgroup_mask(member) |
518 unit_get_members_mask(member);
519 }
520 }
521
522 u->cgroup_members_mask_valid = true;
6414b7c9 523 return u->cgroup_members_mask;
246aa6dd
LP
524}
525
bc432dc7 526CGroupControllerMask unit_get_siblings_mask(Unit *u) {
4ad49000 527 assert(u);
246aa6dd 528
bc432dc7 529 if (UNIT_ISSET(u->slice))
637f421e 530 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 531
637f421e 532 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
533}
534
bc432dc7 535CGroupControllerMask unit_get_target_mask(Unit *u) {
6414b7c9
DS
536 CGroupControllerMask mask;
537
538 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
539 mask &= u->manager->cgroup_supported;
540
541 return mask;
542}
543
544/* Recurse from a unit up through its containing slices, propagating
545 * mask bits upward. A unit is also member of itself. */
bc432dc7
LP
546void unit_update_cgroup_members_masks(Unit *u) {
547 CGroupControllerMask m;
548 bool more;
549
550 assert(u);
551
552 /* Calculate subtree mask */
553 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
554
555 /* See if anything changed from the previous invocation. If
556 * not, we're done. */
557 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
558 return;
559
560 more =
561 u->cgroup_subtree_mask_valid &&
562 ((m & ~u->cgroup_subtree_mask) != 0) &&
563 ((~m & u->cgroup_subtree_mask) == 0);
564
565 u->cgroup_subtree_mask = m;
566 u->cgroup_subtree_mask_valid = true;
567
6414b7c9
DS
568 if (UNIT_ISSET(u->slice)) {
569 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
570
571 if (more)
572 /* There's more set now than before. We
573 * propagate the new mask to the parent's mask
574 * (not caring if it actually was valid or
575 * not). */
576
577 s->cgroup_members_mask |= m;
578
579 else
580 /* There's less set now than before (or we
581 * don't know), we need to recalculate
582 * everything, so let's invalidate the
583 * parent's members mask */
584
585 s->cgroup_members_mask_valid = false;
586
587 /* And now make sure that this change also hits our
588 * grandparents */
589 unit_update_cgroup_members_masks(s);
6414b7c9
DS
590 }
591}
592
03b90d4b
LP
593static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
594 Unit *u = userdata;
595
596 assert(mask != 0);
597 assert(u);
598
599 while (u) {
600 if (u->cgroup_path &&
601 u->cgroup_realized &&
602 (u->cgroup_realized_mask & mask) == mask)
603 return u->cgroup_path;
604
605 u = UNIT_DEREF(u->slice);
606 }
607
608 return NULL;
609}
610
4ad49000 611static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
03b90d4b 612 _cleanup_free_ char *path = NULL;
bc432dc7 613 int r;
64747e2d 614
4ad49000 615 assert(u);
64747e2d 616
4ad49000
LP
617 path = unit_default_cgroup_path(u);
618 if (!path)
a94042fa 619 return log_oom();
64747e2d 620
0a1eb06d 621 r = hashmap_put(u->manager->cgroup_unit, path, u);
03b90d4b
LP
622 if (r < 0) {
623 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
0a1eb06d 624 return r;
b58b8e11 625 }
03b90d4b 626 if (r > 0) {
b58b8e11 627 u->cgroup_path = path;
a94042fa 628 path = NULL;
b58b8e11
HH
629 }
630
03b90d4b
LP
631 /* First, create our own group */
632 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
633 if (r < 0) {
da927ba9 634 log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path);
03b90d4b
LP
635 return r;
636 }
637
638 /* Keep track that this is now realized */
4ad49000 639 u->cgroup_realized = true;
bc432dc7 640 u->cgroup_realized_mask = mask;
4ad49000 641
03b90d4b
LP
642 /* Then, possibly move things over */
643 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
644 if (r < 0)
da927ba9 645 log_warning_errno(r, "Failed to migrate cgroup from to %s: %m", u->cgroup_path);
03b90d4b 646
64747e2d
LP
647 return 0;
648}
649
6414b7c9 650static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
bc432dc7
LP
651 assert(u);
652
653 return u->cgroup_realized && u->cgroup_realized_mask == mask;
6414b7c9
DS
654}
655
656/* Check if necessary controllers and attributes for a unit are in place.
657 *
658 * If so, do nothing.
659 * If not, create paths, move processes over, and set attributes.
660 *
661 * Returns 0 on success and < 0 on failure. */
db785129 662static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
4ad49000 663 CGroupControllerMask mask;
6414b7c9 664 int r;
64747e2d 665
4ad49000 666 assert(u);
64747e2d 667
4ad49000 668 if (u->in_cgroup_queue) {
71fda00f 669 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
670 u->in_cgroup_queue = false;
671 }
64747e2d 672
6414b7c9 673 mask = unit_get_target_mask(u);
64747e2d 674
6414b7c9 675 if (unit_has_mask_realized(u, mask))
0a1eb06d 676 return 0;
64747e2d 677
4ad49000 678 /* First, realize parents */
6414b7c9 679 if (UNIT_ISSET(u->slice)) {
db785129 680 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
681 if (r < 0)
682 return r;
683 }
4ad49000
LP
684
685 /* And then do the real work */
6414b7c9
DS
686 r = unit_create_cgroups(u, mask);
687 if (r < 0)
688 return r;
689
690 /* Finally, apply the necessary attributes. */
db785129 691 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
6414b7c9
DS
692
693 return 0;
64747e2d
LP
694}
695
4ad49000 696static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 697
4ad49000
LP
698 if (u->in_cgroup_queue)
699 return;
8e274523 700
71fda00f 701 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
702 u->in_cgroup_queue = true;
703}
8c6db833 704
4ad49000 705unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 706 ManagerState state;
4ad49000 707 unsigned n = 0;
db785129 708 Unit *i;
6414b7c9 709 int r;
ecedd90f 710
db785129
LP
711 state = manager_state(m);
712
4ad49000
LP
713 while ((i = m->cgroup_queue)) {
714 assert(i->in_cgroup_queue);
ecedd90f 715
db785129 716 r = unit_realize_cgroup_now(i, state);
6414b7c9 717 if (r < 0)
da927ba9 718 log_warning_errno(r, "Failed to realize cgroups for queued unit %s: %m", i->id);
0a1eb06d 719
4ad49000
LP
720 n++;
721 }
ecedd90f 722
4ad49000 723 return n;
8e274523
LP
724}
725
4ad49000
LP
726static void unit_queue_siblings(Unit *u) {
727 Unit *slice;
ca949c9d 728
4ad49000
LP
729 /* This adds the siblings of the specified unit and the
730 * siblings of all parent units to the cgroup queue. (But
731 * neither the specified unit itself nor the parents.) */
732
733 while ((slice = UNIT_DEREF(u->slice))) {
734 Iterator i;
735 Unit *m;
8f53a7b8 736
4ad49000
LP
737 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
738 if (m == u)
739 continue;
8e274523 740
6414b7c9
DS
741 /* Skip units that have a dependency on the slice
742 * but aren't actually in it. */
4ad49000 743 if (UNIT_DEREF(m->slice) != slice)
50159e6a 744 continue;
8e274523 745
6414b7c9
DS
746 /* No point in doing cgroup application for units
747 * without active processes. */
748 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
749 continue;
750
751 /* If the unit doesn't need any new controllers
752 * and has current ones realized, it doesn't need
753 * any changes. */
754 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
755 continue;
756
4ad49000 757 unit_add_to_cgroup_queue(m);
50159e6a
LP
758 }
759
4ad49000 760 u = slice;
8e274523 761 }
4ad49000
LP
762}
763
0a1eb06d 764int unit_realize_cgroup(Unit *u) {
4ad49000
LP
765 CGroupContext *c;
766
767 assert(u);
768
769 c = unit_get_cgroup_context(u);
770 if (!c)
0a1eb06d 771 return 0;
8e274523 772
4ad49000
LP
773 /* So, here's the deal: when realizing the cgroups for this
774 * unit, we need to first create all parents, but there's more
775 * actually: for the weight-based controllers we also need to
776 * make sure that all our siblings (i.e. units that are in the
73e231ab 777 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
778 * would become very uneven as each of their processes would
779 * get as much resources as all our group together. This call
780 * will synchronously create the parent cgroups, but will
781 * defer work on the siblings to the next event loop
782 * iteration. */
ca949c9d 783
4ad49000
LP
784 /* Add all sibling slices to the cgroup queue. */
785 unit_queue_siblings(u);
786
6414b7c9 787 /* And realize this one now (and apply the values) */
db785129 788 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
789}
790
4ad49000 791void unit_destroy_cgroup(Unit *u) {
8e274523
LP
792 int r;
793
4ad49000 794 assert(u);
8e274523 795
4ad49000
LP
796 if (!u->cgroup_path)
797 return;
8e274523 798
13b84ec7 799 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
4ad49000 800 if (r < 0)
da927ba9 801 log_debug_errno(r, "Failed to destroy cgroup %s: %m", u->cgroup_path);
8e274523 802
0a1eb06d
LP
803 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
804
4ad49000
LP
805 free(u->cgroup_path);
806 u->cgroup_path = NULL;
807 u->cgroup_realized = false;
bc432dc7 808 u->cgroup_realized_mask = 0;
0a1eb06d 809
8e274523
LP
810}
811
4ad49000
LP
812pid_t unit_search_main_pid(Unit *u) {
813 _cleanup_fclose_ FILE *f = NULL;
814 pid_t pid = 0, npid, mypid;
815
816 assert(u);
817
818 if (!u->cgroup_path)
819 return 0;
820
821 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
822 return 0;
823
824 mypid = getpid();
825 while (cg_read_pid(f, &npid) > 0) {
826 pid_t ppid;
827
828 if (npid == pid)
829 continue;
8e274523 830
4ad49000
LP
831 /* Ignore processes that aren't our kids */
832 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
833 continue;
8e274523 834
4ad49000
LP
835 if (pid != 0) {
836 /* Dang, there's more than one daemonized PID
837 in this group, so we don't know what process
838 is the main process. */
839 pid = 0;
840 break;
841 }
8e274523 842
4ad49000 843 pid = npid;
8e274523
LP
844 }
845
4ad49000 846 return pid;
8e274523
LP
847}
848
8e274523 849int manager_setup_cgroup(Manager *m) {
9444b1f2 850 _cleanup_free_ char *path = NULL;
8e274523 851 int r;
8e274523
LP
852
853 assert(m);
854
35d2e7ec 855 /* 1. Determine hierarchy */
9444b1f2
LP
856 free(m->cgroup_root);
857 m->cgroup_root = NULL;
858
859 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
9156e799 860 if (r < 0) {
da927ba9 861 log_error_errno(r, "Cannot determine cgroup we are running in: %m");
a32360f1 862 return r;
12235040 863 }
8e274523 864
15c60e99
LP
865 /* LEGACY: Already in /system.slice? If so, let's cut this
866 * off. This is to support live upgrades from older systemd
867 * versions where PID 1 was moved there. */
9444b1f2 868 if (m->running_as == SYSTEMD_SYSTEM) {
0d8c31ff
ZJS
869 char *e;
870
9444b1f2 871 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99
LP
872 if (!e)
873 e = endswith(m->cgroup_root, "/system");
9444b1f2
LP
874 if (e)
875 *e = 0;
0baf24dd 876 }
7ccfb64a 877
9444b1f2
LP
878 /* And make sure to store away the root value without trailing
879 * slash, even for the root dir, so that we can easily prepend
880 * it everywhere. */
881 if (streq(m->cgroup_root, "/"))
882 m->cgroup_root[0] = 0;
8e274523 883
35d2e7ec 884 /* 2. Show data */
9444b1f2 885 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
3474ae3c 886 if (r < 0) {
da927ba9 887 log_error_errno(r, "Cannot find cgroup mount point: %m");
a32360f1 888 return r;
12235040 889 }
8e274523 890
c6c18be3 891 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
0d8c31ff 892 if (!m->test_run) {
c6c18be3 893
0d8c31ff
ZJS
894 /* 3. Install agent */
895 if (m->running_as == SYSTEMD_SYSTEM) {
896 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
897 if (r < 0)
da927ba9 898 log_warning_errno(r, "Failed to install release agent, ignoring: %m");
0d8c31ff
ZJS
899 else if (r > 0)
900 log_debug("Installed release agent.");
901 else
902 log_debug("Release agent already installed.");
903 }
8e274523 904
0d8c31ff
ZJS
905 /* 4. Make sure we are in the root cgroup */
906 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
907 if (r < 0) {
da927ba9 908 log_error_errno(r, "Failed to create root cgroup hierarchy: %m");
0d8c31ff
ZJS
909 return r;
910 }
c6c18be3 911
0d8c31ff
ZJS
912 /* 5. And pin it, so that it cannot be unmounted */
913 safe_close(m->pin_cgroupfs_fd);
c6c18be3 914
0d8c31ff
ZJS
915 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
916 if (m->pin_cgroupfs_fd < 0) {
917 log_error("Failed to open pin file: %m");
918 return -errno;
919 }
920
921 /* 6. Always enable hierarchial support if it exists... */
922 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
c6c18be3
LP
923 }
924
0d8c31ff 925 /* 7. Figure out which controllers are supported */
4ad49000 926 m->cgroup_supported = cg_mask_supported();
9156e799 927
a32360f1 928 return 0;
8e274523
LP
929}
930
c6c18be3 931void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
932 assert(m);
933
9444b1f2
LP
934 /* We can't really delete the group, since we are in it. But
935 * let's trim it. */
936 if (delete && m->cgroup_root)
937 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
8e274523 938
03e334a1 939 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 940
9444b1f2
LP
941 free(m->cgroup_root);
942 m->cgroup_root = NULL;
8e274523
LP
943}
944
4ad49000 945Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 946 char *p;
4ad49000 947 Unit *u;
acb14d31
LP
948
949 assert(m);
950 assert(cgroup);
acb14d31 951
4ad49000
LP
952 u = hashmap_get(m->cgroup_unit, cgroup);
953 if (u)
954 return u;
acb14d31 955
8e70580b 956 p = strdupa(cgroup);
acb14d31
LP
957 for (;;) {
958 char *e;
959
960 e = strrchr(p, '/');
4ad49000
LP
961 if (e == p || !e)
962 return NULL;
acb14d31
LP
963
964 *e = 0;
965
4ad49000
LP
966 u = hashmap_get(m->cgroup_unit, p);
967 if (u)
968 return u;
acb14d31
LP
969 }
970}
971
4ad49000
LP
972Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
973 _cleanup_free_ char *cgroup = NULL;
acb14d31 974 int r;
8e274523 975
8c47c732
LP
976 assert(m);
977
978 if (pid <= 1)
979 return NULL;
980
4ad49000
LP
981 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
982 if (r < 0)
6dde1f33
LP
983 return NULL;
984
4ad49000 985 return manager_get_unit_by_cgroup(m, cgroup);
6dde1f33 986}
4fbf50b3 987
4ad49000
LP
988int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
989 Unit *u;
990 int r;
4fbf50b3 991
4ad49000
LP
992 assert(m);
993 assert(cgroup);
4fbf50b3 994
4ad49000 995 u = manager_get_unit_by_cgroup(m, cgroup);
b56c28c3 996 if (u) {
06025d91
LP
997 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
998 if (r > 0) {
999 if (UNIT_VTABLE(u)->notify_cgroup_empty)
1000 UNIT_VTABLE(u)->notify_cgroup_empty(u);
b56c28c3 1001
06025d91
LP
1002 unit_add_to_gc_queue(u);
1003 }
b56c28c3 1004 }
2633eb83 1005
4ad49000 1006 return 0;
4fbf50b3
LP
1007}
1008
4ad49000
LP
1009static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
1010 [CGROUP_AUTO] = "auto",
1011 [CGROUP_CLOSED] = "closed",
1012 [CGROUP_STRICT] = "strict",
1013};
4fbf50b3 1014
4ad49000 1015DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);