]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
Merge pull request #3329 from htejun/dbus-cgroup-fixes
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
8e274523
LP
1/***
2 This file is part of systemd.
3
4ad49000 4 Copyright 2013 Lennart Poettering
8e274523
LP
5
6 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 14 Lesser General Public License for more details.
8e274523 15
5430f7f2 16 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
c6c18be3 20#include <fcntl.h>
e41969e3 21#include <fnmatch.h>
8c6db833 22
b5efdb8a 23#include "alloc-util.h"
03a7b521 24#include "cgroup-util.h"
3ffd4af2
LP
25#include "cgroup.h"
26#include "fd-util.h"
0d39fa9c 27#include "fileio.h"
77601719 28#include "fs-util.h"
6bedfcbb 29#include "parse-util.h"
9eb977db 30#include "path-util.h"
03a7b521 31#include "process-util.h"
9444b1f2 32#include "special.h"
8b43440b 33#include "string-table.h"
07630cea 34#include "string-util.h"
13c31542 35#include "stdio-util.h"
8e274523 36
9a054909
LP
37#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
38
4ad49000
LP
39void cgroup_context_init(CGroupContext *c) {
40 assert(c);
41
42 /* Initialize everything to the kernel defaults, assuming the
43 * structure is preinitialized to 0 */
44
d53d9474
LP
45 c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
46 c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
47 c->cpu_quota_per_sec_usec = USEC_INFINITY;
48
ddca82ac 49 c->memory_limit = (uint64_t) -1;
b2f8b02e 50
13c31542
TH
51 c->io_weight = CGROUP_WEIGHT_INVALID;
52 c->startup_io_weight = CGROUP_WEIGHT_INVALID;
53
d53d9474
LP
54 c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
55 c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
56
57 c->tasks_max = (uint64_t) -1;
4ad49000 58}
8e274523 59
4ad49000
LP
60void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
61 assert(c);
62 assert(a);
63
71fda00f 64 LIST_REMOVE(device_allow, c->device_allow, a);
4ad49000
LP
65 free(a->path);
66 free(a);
67}
68
13c31542
TH
69void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) {
70 assert(c);
71 assert(w);
72
73 LIST_REMOVE(device_weights, c->io_device_weights, w);
74 free(w->path);
75 free(w);
76}
77
78void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
79 assert(c);
80 assert(l);
81
82 LIST_REMOVE(device_limits, c->io_device_limits, l);
83 free(l->path);
84 free(l);
85}
86
4ad49000
LP
87void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
88 assert(c);
89 assert(w);
90
71fda00f 91 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
4ad49000
LP
92 free(w->path);
93 free(w);
94}
95
96void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
97 assert(c);
8e274523 98 assert(b);
8e274523 99
71fda00f 100 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
4ad49000
LP
101 free(b->path);
102 free(b);
103}
104
105void cgroup_context_done(CGroupContext *c) {
106 assert(c);
107
13c31542
TH
108 while (c->io_device_weights)
109 cgroup_context_free_io_device_weight(c, c->io_device_weights);
110
111 while (c->io_device_limits)
112 cgroup_context_free_io_device_limit(c, c->io_device_limits);
113
4ad49000
LP
114 while (c->blockio_device_weights)
115 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
116
117 while (c->blockio_device_bandwidths)
118 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
119
120 while (c->device_allow)
121 cgroup_context_free_device_allow(c, c->device_allow);
122}
123
124void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
13c31542
TH
125 CGroupIODeviceLimit *il;
126 CGroupIODeviceWeight *iw;
4ad49000
LP
127 CGroupBlockIODeviceBandwidth *b;
128 CGroupBlockIODeviceWeight *w;
129 CGroupDeviceAllow *a;
9a054909 130 char u[FORMAT_TIMESPAN_MAX];
4ad49000
LP
131
132 assert(c);
133 assert(f);
134
135 prefix = strempty(prefix);
136
137 fprintf(f,
138 "%sCPUAccounting=%s\n"
13c31542 139 "%sIOAccounting=%s\n"
4ad49000
LP
140 "%sBlockIOAccounting=%s\n"
141 "%sMemoryAccounting=%s\n"
d53d9474
LP
142 "%sTasksAccounting=%s\n"
143 "%sCPUShares=%" PRIu64 "\n"
144 "%sStartupCPUShares=%" PRIu64 "\n"
b2f8b02e 145 "%sCPUQuotaPerSecSec=%s\n"
13c31542
TH
146 "%sIOWeight=%" PRIu64 "\n"
147 "%sStartupIOWeight=%" PRIu64 "\n"
d53d9474
LP
148 "%sBlockIOWeight=%" PRIu64 "\n"
149 "%sStartupBlockIOWeight=%" PRIu64 "\n"
4ad49000 150 "%sMemoryLimit=%" PRIu64 "\n"
03a7b521 151 "%sTasksMax=%" PRIu64 "\n"
a931ad47
LP
152 "%sDevicePolicy=%s\n"
153 "%sDelegate=%s\n",
4ad49000 154 prefix, yes_no(c->cpu_accounting),
13c31542 155 prefix, yes_no(c->io_accounting),
4ad49000
LP
156 prefix, yes_no(c->blockio_accounting),
157 prefix, yes_no(c->memory_accounting),
d53d9474 158 prefix, yes_no(c->tasks_accounting),
4ad49000 159 prefix, c->cpu_shares,
95ae05c0 160 prefix, c->startup_cpu_shares,
b1d6dcf5 161 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
13c31542
TH
162 prefix, c->io_weight,
163 prefix, c->startup_io_weight,
4ad49000 164 prefix, c->blockio_weight,
95ae05c0 165 prefix, c->startup_blockio_weight,
4ad49000 166 prefix, c->memory_limit,
03a7b521 167 prefix, c->tasks_max,
a931ad47
LP
168 prefix, cgroup_device_policy_to_string(c->device_policy),
169 prefix, yes_no(c->delegate));
4ad49000
LP
170
171 LIST_FOREACH(device_allow, a, c->device_allow)
172 fprintf(f,
173 "%sDeviceAllow=%s %s%s%s\n",
174 prefix,
175 a->path,
176 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
177
13c31542
TH
178 LIST_FOREACH(device_weights, iw, c->io_device_weights)
179 fprintf(f,
180 "%sIODeviceWeight=%s %" PRIu64,
181 prefix,
182 iw->path,
183 iw->weight);
184
185 LIST_FOREACH(device_limits, il, c->io_device_limits) {
186 char buf[FORMAT_BYTES_MAX];
9be57249
TH
187 CGroupIOLimitType type;
188
189 for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
190 if (il->limits[type] != cgroup_io_limit_defaults[type])
191 fprintf(f,
192 "%s%s=%s %s\n",
193 prefix,
194 cgroup_io_limit_type_to_string(type),
195 il->path,
196 format_bytes(buf, sizeof(buf), il->limits[type]));
13c31542
TH
197 }
198
4ad49000
LP
199 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
200 fprintf(f,
d53d9474 201 "%sBlockIODeviceWeight=%s %" PRIu64,
4ad49000
LP
202 prefix,
203 w->path,
204 w->weight);
205
206 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
207 char buf[FORMAT_BYTES_MAX];
208
979d0311
TH
209 if (b->rbps != CGROUP_LIMIT_MAX)
210 fprintf(f,
211 "%sBlockIOReadBandwidth=%s %s\n",
212 prefix,
213 b->path,
214 format_bytes(buf, sizeof(buf), b->rbps));
215 if (b->wbps != CGROUP_LIMIT_MAX)
216 fprintf(f,
217 "%sBlockIOWriteBandwidth=%s %s\n",
218 prefix,
219 b->path,
220 format_bytes(buf, sizeof(buf), b->wbps));
4ad49000
LP
221 }
222}
223
13c31542 224static int lookup_block_device(const char *p, dev_t *dev) {
4ad49000
LP
225 struct stat st;
226 int r;
227
228 assert(p);
229 assert(dev);
230
231 r = stat(p, &st);
4a62c710
MS
232 if (r < 0)
233 return log_warning_errno(errno, "Couldn't stat device %s: %m", p);
8e274523 234
4ad49000
LP
235 if (S_ISBLK(st.st_mode))
236 *dev = st.st_rdev;
237 else if (major(st.st_dev) != 0) {
238 /* If this is not a device node then find the block
239 * device this file is stored on */
240 *dev = st.st_dev;
241
242 /* If this is a partition, try to get the originating
243 * block device */
244 block_get_whole_disk(*dev, dev);
245 } else {
246 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
247 return -ENODEV;
248 }
8e274523 249
8e274523 250 return 0;
8e274523
LP
251}
252
4ad49000
LP
253static int whitelist_device(const char *path, const char *node, const char *acc) {
254 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
255 struct stat st;
8c6db833 256 int r;
8e274523 257
4ad49000
LP
258 assert(path);
259 assert(acc);
8e274523 260
4ad49000
LP
261 if (stat(node, &st) < 0) {
262 log_warning("Couldn't stat device %s", node);
263 return -errno;
264 }
265
266 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
267 log_warning("%s is not a device.", node);
268 return -ENODEV;
269 }
270
271 sprintf(buf,
272 "%c %u:%u %s",
273 S_ISCHR(st.st_mode) ? 'c' : 'b',
274 major(st.st_rdev), minor(st.st_rdev),
275 acc);
276
277 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b 278 if (r < 0)
077ba06e 279 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 280 "Failed to set devices.allow on %s: %m", path);
4ad49000
LP
281
282 return r;
8e274523
LP
283}
284
90060676
LP
285static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
286 _cleanup_fclose_ FILE *f = NULL;
287 char line[LINE_MAX];
288 bool good = false;
289 int r;
290
291 assert(path);
292 assert(acc);
293 assert(type == 'b' || type == 'c');
294
295 f = fopen("/proc/devices", "re");
4a62c710
MS
296 if (!f)
297 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
90060676
LP
298
299 FOREACH_LINE(line, f, goto fail) {
300 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
301 unsigned maj;
302
303 truncate_nl(line);
304
305 if (type == 'c' && streq(line, "Character devices:")) {
306 good = true;
307 continue;
308 }
309
310 if (type == 'b' && streq(line, "Block devices:")) {
311 good = true;
312 continue;
313 }
314
315 if (isempty(line)) {
316 good = false;
317 continue;
318 }
319
320 if (!good)
321 continue;
322
323 p = strstrip(line);
324
325 w = strpbrk(p, WHITESPACE);
326 if (!w)
327 continue;
328 *w = 0;
329
330 r = safe_atou(p, &maj);
331 if (r < 0)
332 continue;
333 if (maj <= 0)
334 continue;
335
336 w++;
337 w += strspn(w, WHITESPACE);
e41969e3
LP
338
339 if (fnmatch(name, w, 0) != 0)
90060676
LP
340 continue;
341
342 sprintf(buf,
343 "%c %u:* %s",
344 type,
345 maj,
346 acc);
347
348 r = cg_set_attribute("devices", path, "devices.allow", buf);
1aeab12b 349 if (r < 0)
077ba06e 350 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 351 "Failed to set devices.allow on %s: %m", path);
90060676
LP
352 }
353
354 return 0;
355
356fail:
56f64d95 357 log_warning_errno(errno, "Failed to read /proc/devices: %m");
90060676
LP
358 return -errno;
359}
360
508c45da 361static bool cgroup_context_has_io_config(CGroupContext *c) {
538b4852
TH
362 return c->io_accounting ||
363 c->io_weight != CGROUP_WEIGHT_INVALID ||
364 c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
365 c->io_device_weights ||
366 c->io_device_limits;
367}
368
508c45da 369static bool cgroup_context_has_blockio_config(CGroupContext *c) {
538b4852
TH
370 return c->blockio_accounting ||
371 c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
372 c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
373 c->blockio_device_weights ||
374 c->blockio_device_bandwidths;
375}
376
508c45da 377static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) {
64faf04c
TH
378 if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
379 c->startup_io_weight != CGROUP_WEIGHT_INVALID)
380 return c->startup_io_weight;
381 else if (c->io_weight != CGROUP_WEIGHT_INVALID)
382 return c->io_weight;
383 else
384 return CGROUP_WEIGHT_DEFAULT;
385}
386
508c45da 387static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) {
64faf04c
TH
388 if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
389 c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
390 return c->startup_blockio_weight;
391 else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
392 return c->blockio_weight;
393 else
394 return CGROUP_BLKIO_WEIGHT_DEFAULT;
395}
396
508c45da 397static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) {
538b4852
TH
398 return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT,
399 CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
400}
401
508c45da 402static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) {
538b4852
TH
403 return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT,
404 CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
405}
406
508c45da 407static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) {
64faf04c
TH
408 char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
409 dev_t dev;
410 int r;
411
412 r = lookup_block_device(dev_path, &dev);
413 if (r < 0)
414 return;
415
416 xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
417 r = cg_set_attribute("io", path, "io.weight", buf);
418 if (r < 0)
419 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
420 "Failed to set io.weight on %s: %m", path);
421}
422
508c45da 423static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) {
64faf04c
TH
424 char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
425 dev_t dev;
426 int r;
427
428 r = lookup_block_device(dev_path, &dev);
429 if (r < 0)
430 return;
431
432 xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
433 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
434 if (r < 0)
435 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
436 "Failed to set blkio.weight_device on %s: %m", path);
437}
438
508c45da 439static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) {
64faf04c
TH
440 char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
441 char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
442 CGroupIOLimitType type;
443 dev_t dev;
444 unsigned n = 0;
445 int r;
446
447 r = lookup_block_device(dev_path, &dev);
448 if (r < 0)
449 return 0;
450
451 for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) {
452 if (limits[type] != cgroup_io_limit_defaults[type]) {
453 xsprintf(limit_bufs[type], "%" PRIu64, limits[type]);
454 n++;
455 } else {
456 xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0");
457 }
458 }
459
460 xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
461 limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
462 limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
463 r = cg_set_attribute("io", path, "io.max", buf);
464 if (r < 0)
465 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
466 "Failed to set io.max on %s: %m", path);
467 return n;
468}
469
508c45da 470static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) {
64faf04c
TH
471 char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
472 dev_t dev;
473 unsigned n = 0;
474 int r;
475
476 r = lookup_block_device(dev_path, &dev);
477 if (r < 0)
478 return 0;
479
480 if (rbps != CGROUP_LIMIT_MAX)
481 n++;
482 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
483 r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf);
484 if (r < 0)
485 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
486 "Failed to set blkio.throttle.read_bps_device on %s: %m", path);
487
488 if (wbps != CGROUP_LIMIT_MAX)
489 n++;
490 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
491 r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf);
492 if (r < 0)
493 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
494 "Failed to set blkio.throttle.write_bps_device on %s: %m", path);
495
496 return n;
497}
498
50f48ad3 499void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {
01efdf13 500 bool is_root;
4ad49000
LP
501 int r;
502
503 assert(c);
504 assert(path);
8e274523 505
4ad49000
LP
506 if (mask == 0)
507 return;
8e274523 508
71c26873 509 /* Some cgroup attributes are not supported on the root cgroup,
01efdf13
LP
510 * hence silently ignore */
511 is_root = isempty(path) || path_equal(path, "/");
6da13913
ZJS
512 if (is_root)
513 /* Make sure we don't try to display messages with an empty path. */
514 path = "/";
01efdf13 515
714e2e1d
LP
516 /* We generally ignore errors caused by read-only mounted
517 * cgroup trees (assuming we are running in a container then),
518 * and missing cgroups, i.e. EROFS and ENOENT. */
519
efdb0237 520 if ((mask & CGROUP_MASK_CPU) && !is_root) {
d53d9474 521 char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
8e274523 522
d53d9474
LP
523 sprintf(buf, "%" PRIu64 "\n",
524 IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
525 c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
4ad49000 526 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
1aeab12b 527 if (r < 0)
077ba06e 528 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 529 "Failed to set cpu.shares on %s: %m", path);
b2f8b02e 530
9a054909 531 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
b2f8b02e 532 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
1aeab12b 533 if (r < 0)
077ba06e 534 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 535 "Failed to set cpu.cfs_period_us on %s: %m", path);
b2f8b02e 536
3a43da28 537 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
9a054909 538 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
b2f8b02e
LP
539 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
540 } else
541 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
1aeab12b 542 if (r < 0)
077ba06e 543 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 544 "Failed to set cpu.cfs_quota_us on %s: %m", path);
4ad49000
LP
545 }
546
13c31542 547 if (mask & CGROUP_MASK_IO) {
538b4852
TH
548 bool has_io = cgroup_context_has_io_config(c);
549 bool has_blockio = cgroup_context_has_blockio_config(c);
13c31542
TH
550
551 if (!is_root) {
64faf04c
TH
552 char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
553 uint64_t weight;
13c31542 554
538b4852
TH
555 if (has_io)
556 weight = cgroup_context_io_weight(c, state);
557 else if (has_blockio)
558 weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state));
559 else
560 weight = CGROUP_WEIGHT_DEFAULT;
13c31542
TH
561
562 xsprintf(buf, "default %" PRIu64 "\n", weight);
563 r = cg_set_attribute("io", path, "io.weight", buf);
564 if (r < 0)
565 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
566 "Failed to set io.weight on %s: %m", path);
567
538b4852
TH
568 if (has_io) {
569 CGroupIODeviceWeight *w;
570
571 /* FIXME: no way to reset this list */
572 LIST_FOREACH(device_weights, w, c->io_device_weights)
573 cgroup_apply_io_device_weight(path, w->path, w->weight);
574 } else if (has_blockio) {
575 CGroupBlockIODeviceWeight *w;
576
577 /* FIXME: no way to reset this list */
578 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
579 cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight));
580 }
13c31542
TH
581 }
582
64faf04c 583 /* Apply limits and free ones without config. */
538b4852
TH
584 if (has_io) {
585 CGroupIODeviceLimit *l, *next;
586
587 LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
588 if (!cgroup_apply_io_device_limit(path, l->path, l->limits))
589 cgroup_context_free_io_device_limit(c, l);
590 }
591 } else if (has_blockio) {
592 CGroupBlockIODeviceBandwidth *b, *next;
593
594 LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
595 uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
596 CGroupIOLimitType type;
597
598 for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
599 limits[type] = cgroup_io_limit_defaults[type];
600
601 limits[CGROUP_IO_RBPS_MAX] = b->rbps;
602 limits[CGROUP_IO_WBPS_MAX] = b->wbps;
603
604 if (!cgroup_apply_io_device_limit(path, b->path, limits))
605 cgroup_context_free_blockio_device_bandwidth(c, b);
606 }
13c31542
TH
607 }
608 }
609
efdb0237 610 if (mask & CGROUP_MASK_BLKIO) {
538b4852
TH
611 bool has_io = cgroup_context_has_io_config(c);
612 bool has_blockio = cgroup_context_has_blockio_config(c);
4ad49000 613
01efdf13 614 if (!is_root) {
64faf04c
TH
615 char buf[DECIMAL_STR_MAX(uint64_t)+1];
616 uint64_t weight;
64faf04c 617
538b4852
TH
618 if (has_blockio)
619 weight = cgroup_context_blkio_weight(c, state);
620 else if (has_io)
621 weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
622 else
623 weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
64faf04c
TH
624
625 xsprintf(buf, "%" PRIu64 "\n", weight);
01efdf13 626 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
1aeab12b 627 if (r < 0)
077ba06e 628 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 629 "Failed to set blkio.weight on %s: %m", path);
4ad49000 630
538b4852
TH
631 if (has_blockio) {
632 CGroupBlockIODeviceWeight *w;
633
634 /* FIXME: no way to reset this list */
635 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
636 cgroup_apply_blkio_device_weight(path, w->path, w->weight);
637 } else if (has_io) {
638 CGroupIODeviceWeight *w;
639
640 /* FIXME: no way to reset this list */
641 LIST_FOREACH(device_weights, w, c->io_device_weights)
642 cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight));
643 }
4ad49000
LP
644 }
645
64faf04c 646 /* Apply limits and free ones without config. */
538b4852
TH
647 if (has_blockio) {
648 CGroupBlockIODeviceBandwidth *b, *next;
649
650 LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
651 if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps))
652 cgroup_context_free_blockio_device_bandwidth(c, b);
653 }
654 } else if (has_io) {
655 CGroupIODeviceLimit *l, *next;
656
657 LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
658 if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
659 cgroup_context_free_io_device_limit(c, l);
660 }
d686d8a9 661 }
8e274523
LP
662 }
663
efdb0237 664 if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
6a94f2e9 665 if (c->memory_limit != (uint64_t) -1) {
e58cec11
LP
666 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
667
6a94f2e9 668 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
efdb0237
LP
669
670 if (cg_unified() <= 0)
671 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
672 else
673 r = cg_set_attribute("memory", path, "memory.max", buf);
674
675 } else {
676 if (cg_unified() <= 0)
677 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
678 else
679 r = cg_set_attribute("memory", path, "memory.max", "max");
680 }
8e274523 681
1aeab12b 682 if (r < 0)
077ba06e 683 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
efdb0237 684 "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
4ad49000 685 }
8e274523 686
3905f127 687 if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
4ad49000 688 CGroupDeviceAllow *a;
8e274523 689
714e2e1d
LP
690 /* Changing the devices list of a populated cgroup
691 * might result in EINVAL, hence ignore EINVAL
692 * here. */
693
4ad49000
LP
694 if (c->device_allow || c->device_policy != CGROUP_AUTO)
695 r = cg_set_attribute("devices", path, "devices.deny", "a");
696 else
697 r = cg_set_attribute("devices", path, "devices.allow", "a");
1aeab12b 698 if (r < 0)
077ba06e 699 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
714e2e1d 700 "Failed to reset devices.list on %s: %m", path);
fb385181 701
4ad49000
LP
702 if (c->device_policy == CGROUP_CLOSED ||
703 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
704 static const char auto_devices[] =
7d711efb
LP
705 "/dev/null\0" "rwm\0"
706 "/dev/zero\0" "rwm\0"
707 "/dev/full\0" "rwm\0"
708 "/dev/random\0" "rwm\0"
709 "/dev/urandom\0" "rwm\0"
710 "/dev/tty\0" "rwm\0"
711 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
4ad49000
LP
712
713 const char *x, *y;
714
715 NULSTR_FOREACH_PAIR(x, y, auto_devices)
716 whitelist_device(path, x, y);
7d711efb
LP
717
718 whitelist_major(path, "pts", 'c', "rw");
719 whitelist_major(path, "kdbus", 'c', "rw");
720 whitelist_major(path, "kdbus/*", 'c', "rw");
4ad49000
LP
721 }
722
723 LIST_FOREACH(device_allow, a, c->device_allow) {
724 char acc[4];
725 unsigned k = 0;
726
727 if (a->r)
728 acc[k++] = 'r';
729 if (a->w)
730 acc[k++] = 'w';
731 if (a->m)
732 acc[k++] = 'm';
fb385181 733
4ad49000
LP
734 if (k == 0)
735 continue;
fb385181 736
4ad49000 737 acc[k++] = 0;
90060676
LP
738
739 if (startswith(a->path, "/dev/"))
740 whitelist_device(path, a->path, acc);
741 else if (startswith(a->path, "block-"))
742 whitelist_major(path, a->path + 6, 'b', acc);
743 else if (startswith(a->path, "char-"))
744 whitelist_major(path, a->path + 5, 'c', acc);
745 else
746 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
4ad49000
LP
747 }
748 }
03a7b521
LP
749
750 if ((mask & CGROUP_MASK_PIDS) && !is_root) {
751
752 if (c->tasks_max != (uint64_t) -1) {
753 char buf[DECIMAL_STR_MAX(uint64_t) + 2];
754
755 sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
756 r = cg_set_attribute("pids", path, "pids.max", buf);
757 } else
758 r = cg_set_attribute("pids", path, "pids.max", "max");
759
760 if (r < 0)
077ba06e 761 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
03a7b521
LP
762 "Failed to set pids.max on %s: %m", path);
763 }
fb385181
LP
764}
765
efdb0237
LP
766CGroupMask cgroup_context_get_mask(CGroupContext *c) {
767 CGroupMask mask = 0;
8e274523 768
4ad49000 769 /* Figure out which controllers we need */
8e274523 770
b2f8b02e 771 if (c->cpu_accounting ||
d53d9474
LP
772 c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
773 c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
3a43da28 774 c->cpu_quota_per_sec_usec != USEC_INFINITY)
efdb0237 775 mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
ecedd90f 776
538b4852
TH
777 if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
778 mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
ecedd90f 779
4ad49000 780 if (c->memory_accounting ||
ddca82ac 781 c->memory_limit != (uint64_t) -1)
efdb0237 782 mask |= CGROUP_MASK_MEMORY;
8e274523 783
a931ad47
LP
784 if (c->device_allow ||
785 c->device_policy != CGROUP_AUTO)
3905f127 786 mask |= CGROUP_MASK_DEVICES;
4ad49000 787
03a7b521
LP
788 if (c->tasks_accounting ||
789 c->tasks_max != (uint64_t) -1)
790 mask |= CGROUP_MASK_PIDS;
791
4ad49000 792 return mask;
8e274523
LP
793}
794
efdb0237 795CGroupMask unit_get_own_mask(Unit *u) {
4ad49000 796 CGroupContext *c;
8e274523 797
efdb0237
LP
798 /* Returns the mask of controllers the unit needs for itself */
799
4ad49000
LP
800 c = unit_get_cgroup_context(u);
801 if (!c)
802 return 0;
8e274523 803
a931ad47 804 /* If delegation is turned on, then turn on all cgroups,
19af675e
LP
805 * unless we are on the legacy hierarchy and the process we
806 * fork into it is known to drop privileges, and hence
807 * shouldn't get access to the controllers.
808 *
809 * Note that on the unified hierarchy it is safe to delegate
810 * controllers to unprivileged services. */
a931ad47
LP
811
812 if (c->delegate) {
813 ExecContext *e;
814
815 e = unit_get_exec_context(u);
19af675e
LP
816 if (!e ||
817 exec_context_maintains_privileges(e) ||
818 cg_unified() > 0)
efdb0237 819 return _CGROUP_MASK_ALL;
a931ad47
LP
820 }
821
db785129 822 return cgroup_context_get_mask(c);
8e274523
LP
823}
824
efdb0237 825CGroupMask unit_get_members_mask(Unit *u) {
4ad49000 826 assert(u);
bc432dc7 827
efdb0237
LP
828 /* Returns the mask of controllers all of the unit's children
829 * require, merged */
830
bc432dc7
LP
831 if (u->cgroup_members_mask_valid)
832 return u->cgroup_members_mask;
833
834 u->cgroup_members_mask = 0;
835
836 if (u->type == UNIT_SLICE) {
837 Unit *member;
838 Iterator i;
839
840 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
841
842 if (member == u)
843 continue;
844
d4fdc205 845 if (UNIT_DEREF(member->slice) != u)
bc432dc7
LP
846 continue;
847
848 u->cgroup_members_mask |=
efdb0237 849 unit_get_own_mask(member) |
bc432dc7
LP
850 unit_get_members_mask(member);
851 }
852 }
853
854 u->cgroup_members_mask_valid = true;
6414b7c9 855 return u->cgroup_members_mask;
246aa6dd
LP
856}
857
efdb0237 858CGroupMask unit_get_siblings_mask(Unit *u) {
4ad49000 859 assert(u);
246aa6dd 860
efdb0237
LP
861 /* Returns the mask of controllers all of the unit's siblings
862 * require, i.e. the members mask of the unit's parent slice
863 * if there is one. */
864
bc432dc7 865 if (UNIT_ISSET(u->slice))
637f421e 866 return unit_get_members_mask(UNIT_DEREF(u->slice));
4ad49000 867
efdb0237 868 return unit_get_own_mask(u) | unit_get_members_mask(u);
246aa6dd
LP
869}
870
efdb0237
LP
871CGroupMask unit_get_subtree_mask(Unit *u) {
872
873 /* Returns the mask of this subtree, meaning of the group
874 * itself and its children. */
875
876 return unit_get_own_mask(u) | unit_get_members_mask(u);
877}
878
879CGroupMask unit_get_target_mask(Unit *u) {
880 CGroupMask mask;
881
882 /* This returns the cgroup mask of all controllers to enable
883 * for a specific cgroup, i.e. everything it needs itself,
884 * plus all that its children need, plus all that its siblings
885 * need. This is primarily useful on the legacy cgroup
886 * hierarchy, where we need to duplicate each cgroup in each
887 * hierarchy that shall be enabled for it. */
6414b7c9 888
efdb0237
LP
889 mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
890 mask &= u->manager->cgroup_supported;
891
892 return mask;
893}
894
895CGroupMask unit_get_enable_mask(Unit *u) {
896 CGroupMask mask;
897
898 /* This returns the cgroup mask of all controllers to enable
899 * for the children of a specific cgroup. This is primarily
900 * useful for the unified cgroup hierarchy, where each cgroup
901 * controls which controllers are enabled for its children. */
902
903 mask = unit_get_members_mask(u);
6414b7c9
DS
904 mask &= u->manager->cgroup_supported;
905
906 return mask;
907}
908
909/* Recurse from a unit up through its containing slices, propagating
910 * mask bits upward. A unit is also member of itself. */
bc432dc7 911void unit_update_cgroup_members_masks(Unit *u) {
efdb0237 912 CGroupMask m;
bc432dc7
LP
913 bool more;
914
915 assert(u);
916
917 /* Calculate subtree mask */
efdb0237 918 m = unit_get_subtree_mask(u);
bc432dc7
LP
919
920 /* See if anything changed from the previous invocation. If
921 * not, we're done. */
922 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
923 return;
924
925 more =
926 u->cgroup_subtree_mask_valid &&
927 ((m & ~u->cgroup_subtree_mask) != 0) &&
928 ((~m & u->cgroup_subtree_mask) == 0);
929
930 u->cgroup_subtree_mask = m;
931 u->cgroup_subtree_mask_valid = true;
932
6414b7c9
DS
933 if (UNIT_ISSET(u->slice)) {
934 Unit *s = UNIT_DEREF(u->slice);
bc432dc7
LP
935
936 if (more)
937 /* There's more set now than before. We
938 * propagate the new mask to the parent's mask
939 * (not caring if it actually was valid or
940 * not). */
941
942 s->cgroup_members_mask |= m;
943
944 else
945 /* There's less set now than before (or we
946 * don't know), we need to recalculate
947 * everything, so let's invalidate the
948 * parent's members mask */
949
950 s->cgroup_members_mask_valid = false;
951
952 /* And now make sure that this change also hits our
953 * grandparents */
954 unit_update_cgroup_members_masks(s);
6414b7c9
DS
955 }
956}
957
efdb0237 958static const char *migrate_callback(CGroupMask mask, void *userdata) {
03b90d4b
LP
959 Unit *u = userdata;
960
961 assert(mask != 0);
962 assert(u);
963
964 while (u) {
965 if (u->cgroup_path &&
966 u->cgroup_realized &&
967 (u->cgroup_realized_mask & mask) == mask)
968 return u->cgroup_path;
969
970 u = UNIT_DEREF(u->slice);
971 }
972
973 return NULL;
974}
975
efdb0237
LP
976char *unit_default_cgroup_path(Unit *u) {
977 _cleanup_free_ char *escaped = NULL, *slice = NULL;
978 int r;
979
980 assert(u);
981
982 if (unit_has_name(u, SPECIAL_ROOT_SLICE))
983 return strdup(u->manager->cgroup_root);
984
985 if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) {
986 r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice);
987 if (r < 0)
988 return NULL;
989 }
990
991 escaped = cg_escape(u->id);
992 if (!escaped)
993 return NULL;
994
995 if (slice)
996 return strjoin(u->manager->cgroup_root, "/", slice, "/", escaped, NULL);
997 else
998 return strjoin(u->manager->cgroup_root, "/", escaped, NULL);
999}
1000
1001int unit_set_cgroup_path(Unit *u, const char *path) {
1002 _cleanup_free_ char *p = NULL;
1003 int r;
1004
1005 assert(u);
1006
1007 if (path) {
1008 p = strdup(path);
1009 if (!p)
1010 return -ENOMEM;
1011 } else
1012 p = NULL;
1013
1014 if (streq_ptr(u->cgroup_path, p))
1015 return 0;
1016
1017 if (p) {
1018 r = hashmap_put(u->manager->cgroup_unit, p, u);
1019 if (r < 0)
1020 return r;
1021 }
1022
1023 unit_release_cgroup(u);
1024
1025 u->cgroup_path = p;
1026 p = NULL;
1027
1028 return 1;
1029}
1030
1031int unit_watch_cgroup(Unit *u) {
ab2c3861 1032 _cleanup_free_ char *events = NULL;
efdb0237
LP
1033 int r;
1034
1035 assert(u);
1036
1037 if (!u->cgroup_path)
1038 return 0;
1039
1040 if (u->cgroup_inotify_wd >= 0)
1041 return 0;
1042
1043 /* Only applies to the unified hierarchy */
1044 r = cg_unified();
1045 if (r < 0)
1046 return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m");
1047 if (r == 0)
1048 return 0;
1049
1050 /* Don't watch the root slice, it's pointless. */
1051 if (unit_has_name(u, SPECIAL_ROOT_SLICE))
1052 return 0;
1053
1054 r = hashmap_ensure_allocated(&u->manager->cgroup_inotify_wd_unit, &trivial_hash_ops);
1055 if (r < 0)
1056 return log_oom();
1057
ab2c3861 1058 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
efdb0237
LP
1059 if (r < 0)
1060 return log_oom();
1061
ab2c3861 1062 u->cgroup_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
efdb0237
LP
1063 if (u->cgroup_inotify_wd < 0) {
1064
1065 if (errno == ENOENT) /* If the directory is already
1066 * gone we don't need to track
1067 * it, so this is not an error */
1068 return 0;
1069
1070 return log_unit_error_errno(u, errno, "Failed to add inotify watch descriptor for control group %s: %m", u->cgroup_path);
1071 }
1072
1073 r = hashmap_put(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd), u);
1074 if (r < 0)
1075 return log_unit_error_errno(u, r, "Failed to add inotify watch descriptor to hash map: %m");
1076
1077 return 0;
1078}
1079
1080static int unit_create_cgroup(
1081 Unit *u,
1082 CGroupMask target_mask,
1083 CGroupMask enable_mask) {
1084
0cd385d3 1085 CGroupContext *c;
bc432dc7 1086 int r;
64747e2d 1087
4ad49000 1088 assert(u);
64747e2d 1089
0cd385d3
LP
1090 c = unit_get_cgroup_context(u);
1091 if (!c)
1092 return 0;
1093
7b3fd631
LP
1094 if (!u->cgroup_path) {
1095 _cleanup_free_ char *path = NULL;
64747e2d 1096
7b3fd631
LP
1097 path = unit_default_cgroup_path(u);
1098 if (!path)
1099 return log_oom();
1100
efdb0237
LP
1101 r = unit_set_cgroup_path(u, path);
1102 if (r == -EEXIST)
1103 return log_unit_error_errno(u, r, "Control group %s exists already.", path);
1104 if (r < 0)
1105 return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
b58b8e11
HH
1106 }
1107
03b90d4b 1108 /* First, create our own group */
efdb0237 1109 r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
23bbb0de 1110 if (r < 0)
efdb0237
LP
1111 return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
1112
1113 /* Start watching it */
1114 (void) unit_watch_cgroup(u);
1115
1116 /* Enable all controllers we need */
1117 r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path);
1118 if (r < 0)
1119 log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
03b90d4b
LP
1120
1121 /* Keep track that this is now realized */
4ad49000 1122 u->cgroup_realized = true;
efdb0237 1123 u->cgroup_realized_mask = target_mask;
ccf78df1 1124 u->cgroup_enabled_mask = enable_mask;
4ad49000 1125
0cd385d3
LP
1126 if (u->type != UNIT_SLICE && !c->delegate) {
1127
1128 /* Then, possibly move things over, but not if
1129 * subgroups may contain processes, which is the case
1130 * for slice and delegation units. */
1131 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
1132 if (r < 0)
efdb0237 1133 log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
0cd385d3 1134 }
03b90d4b 1135
64747e2d
LP
1136 return 0;
1137}
1138
7b3fd631
LP
1139int unit_attach_pids_to_cgroup(Unit *u) {
1140 int r;
1141 assert(u);
1142
1143 r = unit_realize_cgroup(u);
1144 if (r < 0)
1145 return r;
1146
1147 r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
1148 if (r < 0)
1149 return r;
1150
1151 return 0;
1152}
1153
ccf78df1 1154static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
bc432dc7
LP
1155 assert(u);
1156
ccf78df1 1157 return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
6414b7c9
DS
1158}
1159
1160/* Check if necessary controllers and attributes for a unit are in place.
1161 *
1162 * If so, do nothing.
1163 * If not, create paths, move processes over, and set attributes.
1164 *
1165 * Returns 0 on success and < 0 on failure. */
db785129 1166static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
efdb0237 1167 CGroupMask target_mask, enable_mask;
6414b7c9 1168 int r;
64747e2d 1169
4ad49000 1170 assert(u);
64747e2d 1171
4ad49000 1172 if (u->in_cgroup_queue) {
71fda00f 1173 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
1174 u->in_cgroup_queue = false;
1175 }
64747e2d 1176
efdb0237 1177 target_mask = unit_get_target_mask(u);
ccf78df1
TH
1178 enable_mask = unit_get_enable_mask(u);
1179
1180 if (unit_has_mask_realized(u, target_mask, enable_mask))
0a1eb06d 1181 return 0;
64747e2d 1182
4ad49000 1183 /* First, realize parents */
6414b7c9 1184 if (UNIT_ISSET(u->slice)) {
db785129 1185 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
6414b7c9
DS
1186 if (r < 0)
1187 return r;
1188 }
4ad49000
LP
1189
1190 /* And then do the real work */
efdb0237 1191 r = unit_create_cgroup(u, target_mask, enable_mask);
6414b7c9
DS
1192 if (r < 0)
1193 return r;
1194
1195 /* Finally, apply the necessary attributes. */
50f48ad3 1196 cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state);
6414b7c9
DS
1197
1198 return 0;
64747e2d
LP
1199}
1200
4ad49000 1201static void unit_add_to_cgroup_queue(Unit *u) {
ecedd90f 1202
4ad49000
LP
1203 if (u->in_cgroup_queue)
1204 return;
8e274523 1205
71fda00f 1206 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
4ad49000
LP
1207 u->in_cgroup_queue = true;
1208}
8c6db833 1209
4ad49000 1210unsigned manager_dispatch_cgroup_queue(Manager *m) {
db785129 1211 ManagerState state;
4ad49000 1212 unsigned n = 0;
db785129 1213 Unit *i;
6414b7c9 1214 int r;
ecedd90f 1215
db785129
LP
1216 state = manager_state(m);
1217
4ad49000
LP
1218 while ((i = m->cgroup_queue)) {
1219 assert(i->in_cgroup_queue);
ecedd90f 1220
db785129 1221 r = unit_realize_cgroup_now(i, state);
6414b7c9 1222 if (r < 0)
efdb0237 1223 log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
0a1eb06d 1224
4ad49000
LP
1225 n++;
1226 }
ecedd90f 1227
4ad49000 1228 return n;
8e274523
LP
1229}
1230
4ad49000
LP
1231static void unit_queue_siblings(Unit *u) {
1232 Unit *slice;
ca949c9d 1233
4ad49000
LP
1234 /* This adds the siblings of the specified unit and the
1235 * siblings of all parent units to the cgroup queue. (But
1236 * neither the specified unit itself nor the parents.) */
1237
1238 while ((slice = UNIT_DEREF(u->slice))) {
1239 Iterator i;
1240 Unit *m;
8f53a7b8 1241
4ad49000
LP
1242 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
1243 if (m == u)
1244 continue;
8e274523 1245
6414b7c9
DS
1246 /* Skip units that have a dependency on the slice
1247 * but aren't actually in it. */
4ad49000 1248 if (UNIT_DEREF(m->slice) != slice)
50159e6a 1249 continue;
8e274523 1250
6414b7c9
DS
1251 /* No point in doing cgroup application for units
1252 * without active processes. */
1253 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
1254 continue;
1255
1256 /* If the unit doesn't need any new controllers
1257 * and has current ones realized, it doesn't need
1258 * any changes. */
ccf78df1 1259 if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
6414b7c9
DS
1260 continue;
1261
4ad49000 1262 unit_add_to_cgroup_queue(m);
50159e6a
LP
1263 }
1264
4ad49000 1265 u = slice;
8e274523 1266 }
4ad49000
LP
1267}
1268
0a1eb06d 1269int unit_realize_cgroup(Unit *u) {
4ad49000
LP
1270 assert(u);
1271
35b7ff80 1272 if (!UNIT_HAS_CGROUP_CONTEXT(u))
0a1eb06d 1273 return 0;
8e274523 1274
4ad49000
LP
1275 /* So, here's the deal: when realizing the cgroups for this
1276 * unit, we need to first create all parents, but there's more
1277 * actually: for the weight-based controllers we also need to
1278 * make sure that all our siblings (i.e. units that are in the
73e231ab 1279 * same slice as we are) have cgroups, too. Otherwise, things
4ad49000
LP
1280 * would become very uneven as each of their processes would
1281 * get as much resources as all our group together. This call
1282 * will synchronously create the parent cgroups, but will
1283 * defer work on the siblings to the next event loop
1284 * iteration. */
ca949c9d 1285
4ad49000
LP
1286 /* Add all sibling slices to the cgroup queue. */
1287 unit_queue_siblings(u);
1288
6414b7c9 1289 /* And realize this one now (and apply the values) */
db785129 1290 return unit_realize_cgroup_now(u, manager_state(u->manager));
8e274523
LP
1291}
1292
efdb0237
LP
1293void unit_release_cgroup(Unit *u) {
1294 assert(u);
1295
1296 /* Forgets all cgroup details for this cgroup */
1297
1298 if (u->cgroup_path) {
1299 (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
1300 u->cgroup_path = mfree(u->cgroup_path);
1301 }
1302
1303 if (u->cgroup_inotify_wd >= 0) {
1304 if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0)
1305 log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring", u->cgroup_inotify_wd, u->id);
1306
1307 (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd));
1308 u->cgroup_inotify_wd = -1;
1309 }
1310}
1311
1312void unit_prune_cgroup(Unit *u) {
8e274523 1313 int r;
efdb0237 1314 bool is_root_slice;
8e274523 1315
4ad49000 1316 assert(u);
8e274523 1317
efdb0237
LP
1318 /* Removes the cgroup, if empty and possible, and stops watching it. */
1319
4ad49000
LP
1320 if (!u->cgroup_path)
1321 return;
8e274523 1322
efdb0237
LP
1323 is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
1324
1325 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
dab5bf85 1326 if (r < 0) {
efdb0237 1327 log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
dab5bf85
RL
1328 return;
1329 }
8e274523 1330
efdb0237
LP
1331 if (is_root_slice)
1332 return;
1333
1334 unit_release_cgroup(u);
0a1eb06d 1335
4ad49000 1336 u->cgroup_realized = false;
bc432dc7 1337 u->cgroup_realized_mask = 0;
ccf78df1 1338 u->cgroup_enabled_mask = 0;
8e274523
LP
1339}
1340
efdb0237 1341int unit_search_main_pid(Unit *u, pid_t *ret) {
4ad49000
LP
1342 _cleanup_fclose_ FILE *f = NULL;
1343 pid_t pid = 0, npid, mypid;
efdb0237 1344 int r;
4ad49000
LP
1345
1346 assert(u);
efdb0237 1347 assert(ret);
4ad49000
LP
1348
1349 if (!u->cgroup_path)
efdb0237 1350 return -ENXIO;
4ad49000 1351
efdb0237
LP
1352 r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
1353 if (r < 0)
1354 return r;
4ad49000
LP
1355
1356 mypid = getpid();
1357 while (cg_read_pid(f, &npid) > 0) {
1358 pid_t ppid;
1359
1360 if (npid == pid)
1361 continue;
8e274523 1362
4ad49000 1363 /* Ignore processes that aren't our kids */
6bc73acb 1364 if (get_process_ppid(npid, &ppid) >= 0 && ppid != mypid)
4ad49000 1365 continue;
8e274523 1366
efdb0237 1367 if (pid != 0)
4ad49000
LP
1368 /* Dang, there's more than one daemonized PID
1369 in this group, so we don't know what process
1370 is the main process. */
efdb0237
LP
1371
1372 return -ENODATA;
8e274523 1373
4ad49000 1374 pid = npid;
8e274523
LP
1375 }
1376
efdb0237
LP
1377 *ret = pid;
1378 return 0;
1379}
1380
1381static int unit_watch_pids_in_path(Unit *u, const char *path) {
b3c5bad3 1382 _cleanup_closedir_ DIR *d = NULL;
efdb0237
LP
1383 _cleanup_fclose_ FILE *f = NULL;
1384 int ret = 0, r;
1385
1386 assert(u);
1387 assert(path);
1388
1389 r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
1390 if (r < 0)
1391 ret = r;
1392 else {
1393 pid_t pid;
1394
1395 while ((r = cg_read_pid(f, &pid)) > 0) {
1396 r = unit_watch_pid(u, pid);
1397 if (r < 0 && ret >= 0)
1398 ret = r;
1399 }
1400
1401 if (r < 0 && ret >= 0)
1402 ret = r;
1403 }
1404
1405 r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
1406 if (r < 0) {
1407 if (ret >= 0)
1408 ret = r;
1409 } else {
1410 char *fn;
1411
1412 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1413 _cleanup_free_ char *p = NULL;
1414
1415 p = strjoin(path, "/", fn, NULL);
1416 free(fn);
1417
1418 if (!p)
1419 return -ENOMEM;
1420
1421 r = unit_watch_pids_in_path(u, p);
1422 if (r < 0 && ret >= 0)
1423 ret = r;
1424 }
1425
1426 if (r < 0 && ret >= 0)
1427 ret = r;
1428 }
1429
1430 return ret;
1431}
1432
1433int unit_watch_all_pids(Unit *u) {
1434 assert(u);
1435
1436 /* Adds all PIDs from our cgroup to the set of PIDs we
1437 * watch. This is a fallback logic for cases where we do not
1438 * get reliable cgroup empty notifications: we try to use
1439 * SIGCHLD as replacement. */
1440
1441 if (!u->cgroup_path)
1442 return -ENOENT;
1443
1444 if (cg_unified() > 0) /* On unified we can use proper notifications */
1445 return 0;
1446
1447 return unit_watch_pids_in_path(u, u->cgroup_path);
1448}
1449
1450int unit_notify_cgroup_empty(Unit *u) {
1451 int r;
1452
1453 assert(u);
1454
1455 if (!u->cgroup_path)
1456 return 0;
1457
1458 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
1459 if (r <= 0)
1460 return r;
1461
1462 unit_add_to_gc_queue(u);
1463
1464 if (UNIT_VTABLE(u)->notify_cgroup_empty)
1465 UNIT_VTABLE(u)->notify_cgroup_empty(u);
1466
1467 return 0;
1468}
1469
1470static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1471 Manager *m = userdata;
1472
1473 assert(s);
1474 assert(fd >= 0);
1475 assert(m);
1476
1477 for (;;) {
1478 union inotify_event_buffer buffer;
1479 struct inotify_event *e;
1480 ssize_t l;
1481
1482 l = read(fd, &buffer, sizeof(buffer));
1483 if (l < 0) {
1484 if (errno == EINTR || errno == EAGAIN)
1485 return 0;
1486
1487 return log_error_errno(errno, "Failed to read control group inotify events: %m");
1488 }
1489
1490 FOREACH_INOTIFY_EVENT(e, buffer, l) {
1491 Unit *u;
1492
1493 if (e->wd < 0)
1494 /* Queue overflow has no watch descriptor */
1495 continue;
1496
1497 if (e->mask & IN_IGNORED)
1498 /* The watch was just removed */
1499 continue;
1500
1501 u = hashmap_get(m->cgroup_inotify_wd_unit, INT_TO_PTR(e->wd));
1502 if (!u) /* Not that inotify might deliver
1503 * events for a watch even after it
1504 * was removed, because it was queued
1505 * before the removal. Let's ignore
1506 * this here safely. */
1507 continue;
1508
1509 (void) unit_notify_cgroup_empty(u);
1510 }
1511 }
8e274523
LP
1512}
1513
8e274523 1514int manager_setup_cgroup(Manager *m) {
9444b1f2 1515 _cleanup_free_ char *path = NULL;
efdb0237
LP
1516 CGroupController c;
1517 int r, unified;
1518 char *e;
8e274523
LP
1519
1520 assert(m);
1521
35d2e7ec 1522 /* 1. Determine hierarchy */
efdb0237 1523 m->cgroup_root = mfree(m->cgroup_root);
9444b1f2 1524 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
23bbb0de
MS
1525 if (r < 0)
1526 return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
8e274523 1527
efdb0237
LP
1528 /* Chop off the init scope, if we are already located in it */
1529 e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
0d8c31ff 1530
efdb0237
LP
1531 /* LEGACY: Also chop off the system slice if we are in
1532 * it. This is to support live upgrades from older systemd
1533 * versions where PID 1 was moved there. Also see
1534 * cg_get_root_path(). */
463d0d15 1535 if (!e && MANAGER_IS_SYSTEM(m)) {
9444b1f2 1536 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
15c60e99 1537 if (!e)
efdb0237 1538 e = endswith(m->cgroup_root, "/system"); /* even more legacy */
0baf24dd 1539 }
efdb0237
LP
1540 if (e)
1541 *e = 0;
7ccfb64a 1542
9444b1f2
LP
1543 /* And make sure to store away the root value without trailing
1544 * slash, even for the root dir, so that we can easily prepend
1545 * it everywhere. */
efdb0237
LP
1546 while ((e = endswith(m->cgroup_root, "/")))
1547 *e = 0;
8e274523 1548
35d2e7ec 1549 /* 2. Show data */
9444b1f2 1550 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
23bbb0de
MS
1551 if (r < 0)
1552 return log_error_errno(r, "Cannot find cgroup mount point: %m");
8e274523 1553
efdb0237
LP
1554 unified = cg_unified();
1555 if (unified < 0)
1556 return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
1557 if (unified > 0)
1558 log_debug("Unified cgroup hierarchy is located at %s.", path);
1559 else
1560 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
1561
0d8c31ff 1562 if (!m->test_run) {
efdb0237 1563 const char *scope_path;
c6c18be3 1564
0d8c31ff 1565 /* 3. Install agent */
efdb0237
LP
1566 if (unified) {
1567
1568 /* In the unified hierarchy we can can get
1569 * cgroup empty notifications via inotify. */
1570
1571 m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
1572 safe_close(m->cgroup_inotify_fd);
1573
1574 m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1575 if (m->cgroup_inotify_fd < 0)
1576 return log_error_errno(errno, "Failed to create control group inotify object: %m");
1577
1578 r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
1579 if (r < 0)
1580 return log_error_errno(r, "Failed to watch control group inotify object: %m");
1581
d8fdc620
LP
1582 /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
1583 * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
1584 r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
efdb0237
LP
1585 if (r < 0)
1586 return log_error_errno(r, "Failed to set priority of inotify event source: %m");
1587
1588 (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
1589
463d0d15 1590 } else if (MANAGER_IS_SYSTEM(m)) {
efdb0237
LP
1591
1592 /* On the legacy hierarchy we only get
1593 * notifications via cgroup agents. (Which
1594 * isn't really reliable, since it does not
1595 * generate events when control groups with
1596 * children run empty. */
1597
0d8c31ff
ZJS
1598 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
1599 if (r < 0)
da927ba9 1600 log_warning_errno(r, "Failed to install release agent, ignoring: %m");
0d8c31ff
ZJS
1601 else if (r > 0)
1602 log_debug("Installed release agent.");
efdb0237 1603 else if (r == 0)
0d8c31ff
ZJS
1604 log_debug("Release agent already installed.");
1605 }
8e274523 1606
efdb0237
LP
1607 /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
1608 scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
1609 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
23bbb0de 1610 if (r < 0)
efdb0237
LP
1611 return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
1612
1613 /* also, move all other userspace processes remaining
1614 * in the root cgroup into that scope. */
1615 r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false);
1616 if (r < 0)
1617 log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
c6c18be3 1618
0d8c31ff
ZJS
1619 /* 5. And pin it, so that it cannot be unmounted */
1620 safe_close(m->pin_cgroupfs_fd);
0d8c31ff 1621 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
4a62c710
MS
1622 if (m->pin_cgroupfs_fd < 0)
1623 return log_error_errno(errno, "Failed to open pin file: %m");
0d8c31ff 1624
cc98b302 1625 /* 6. Always enable hierarchical support if it exists... */
efdb0237
LP
1626 if (!unified)
1627 (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
c6c18be3
LP
1628 }
1629
0d8c31ff 1630 /* 7. Figure out which controllers are supported */
efdb0237
LP
1631 r = cg_mask_supported(&m->cgroup_supported);
1632 if (r < 0)
1633 return log_error_errno(r, "Failed to determine supported controllers: %m");
1634
1635 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
1636 log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c));
9156e799 1637
a32360f1 1638 return 0;
8e274523
LP
1639}
1640
c6c18be3 1641void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
1642 assert(m);
1643
9444b1f2
LP
1644 /* We can't really delete the group, since we are in it. But
1645 * let's trim it. */
1646 if (delete && m->cgroup_root)
efdb0237
LP
1647 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
1648
1649 m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit);
1650
1651 m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
1652 m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);
8e274523 1653
03e334a1 1654 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
c6c18be3 1655
efdb0237 1656 m->cgroup_root = mfree(m->cgroup_root);
8e274523
LP
1657}
1658
4ad49000 1659Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
acb14d31 1660 char *p;
4ad49000 1661 Unit *u;
acb14d31
LP
1662
1663 assert(m);
1664 assert(cgroup);
acb14d31 1665
4ad49000
LP
1666 u = hashmap_get(m->cgroup_unit, cgroup);
1667 if (u)
1668 return u;
acb14d31 1669
8e70580b 1670 p = strdupa(cgroup);
acb14d31
LP
1671 for (;;) {
1672 char *e;
1673
1674 e = strrchr(p, '/');
efdb0237
LP
1675 if (!e || e == p)
1676 return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);
acb14d31
LP
1677
1678 *e = 0;
1679
4ad49000
LP
1680 u = hashmap_get(m->cgroup_unit, p);
1681 if (u)
1682 return u;
acb14d31
LP
1683 }
1684}
1685
b3ac818b 1686Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
4ad49000 1687 _cleanup_free_ char *cgroup = NULL;
acb14d31 1688 int r;
8e274523 1689
8c47c732
LP
1690 assert(m);
1691
b3ac818b
LP
1692 if (pid <= 0)
1693 return NULL;
1694
1695 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
1696 if (r < 0)
1697 return NULL;
1698
1699 return manager_get_unit_by_cgroup(m, cgroup);
1700}
1701
1702Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
1703 Unit *u;
1704
1705 assert(m);
1706
efdb0237 1707 if (pid <= 0)
8c47c732
LP
1708 return NULL;
1709
efdb0237
LP
1710 if (pid == 1)
1711 return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
1712
fea72cc0 1713 u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid));
5fe8876b
LP
1714 if (u)
1715 return u;
1716
fea72cc0 1717 u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid));
5fe8876b
LP
1718 if (u)
1719 return u;
1720
b3ac818b 1721 return manager_get_unit_by_pid_cgroup(m, pid);
6dde1f33 1722}
4fbf50b3 1723
4ad49000
LP
1724int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
1725 Unit *u;
4fbf50b3 1726
4ad49000
LP
1727 assert(m);
1728 assert(cgroup);
4fbf50b3 1729
d8fdc620
LP
1730 log_debug("Got cgroup empty notification for: %s", cgroup);
1731
4ad49000 1732 u = manager_get_unit_by_cgroup(m, cgroup);
5ad096b3
LP
1733 if (!u)
1734 return 0;
b56c28c3 1735
efdb0237 1736 return unit_notify_cgroup_empty(u);
5ad096b3
LP
1737}
1738
1739int unit_get_memory_current(Unit *u, uint64_t *ret) {
1740 _cleanup_free_ char *v = NULL;
1741 int r;
1742
1743 assert(u);
1744 assert(ret);
1745
1746 if (!u->cgroup_path)
1747 return -ENODATA;
1748
efdb0237 1749 if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
5ad096b3
LP
1750 return -ENODATA;
1751
efdb0237
LP
1752 if (cg_unified() <= 0)
1753 r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
1754 else
1755 r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
5ad096b3
LP
1756 if (r == -ENOENT)
1757 return -ENODATA;
1758 if (r < 0)
1759 return r;
1760
1761 return safe_atou64(v, ret);
1762}
1763
03a7b521
LP
1764int unit_get_tasks_current(Unit *u, uint64_t *ret) {
1765 _cleanup_free_ char *v = NULL;
1766 int r;
1767
1768 assert(u);
1769 assert(ret);
1770
1771 if (!u->cgroup_path)
1772 return -ENODATA;
1773
1774 if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
1775 return -ENODATA;
1776
1777 r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v);
1778 if (r == -ENOENT)
1779 return -ENODATA;
1780 if (r < 0)
1781 return r;
1782
1783 return safe_atou64(v, ret);
1784}
1785
5ad096b3
LP
1786static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
1787 _cleanup_free_ char *v = NULL;
1788 uint64_t ns;
1789 int r;
1790
1791 assert(u);
1792 assert(ret);
1793
1794 if (!u->cgroup_path)
1795 return -ENODATA;
1796
efdb0237 1797 if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
5ad096b3
LP
1798 return -ENODATA;
1799
1800 r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
1801 if (r == -ENOENT)
1802 return -ENODATA;
1803 if (r < 0)
1804 return r;
1805
1806 r = safe_atou64(v, &ns);
1807 if (r < 0)
1808 return r;
1809
1810 *ret = ns;
1811 return 0;
1812}
1813
1814int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
1815 nsec_t ns;
1816 int r;
1817
1818 r = unit_get_cpu_usage_raw(u, &ns);
1819 if (r < 0)
1820 return r;
1821
1822 if (ns > u->cpuacct_usage_base)
1823 ns -= u->cpuacct_usage_base;
1824 else
1825 ns = 0;
1826
1827 *ret = ns;
1828 return 0;
1829}
1830
1831int unit_reset_cpu_usage(Unit *u) {
1832 nsec_t ns;
1833 int r;
1834
1835 assert(u);
1836
1837 r = unit_get_cpu_usage_raw(u, &ns);
1838 if (r < 0) {
1839 u->cpuacct_usage_base = 0;
1840 return r;
b56c28c3 1841 }
2633eb83 1842
5ad096b3 1843 u->cpuacct_usage_base = ns;
4ad49000 1844 return 0;
4fbf50b3
LP
1845}
1846
e9db43d5
LP
1847bool unit_cgroup_delegate(Unit *u) {
1848 CGroupContext *c;
1849
1850 assert(u);
1851
1852 c = unit_get_cgroup_context(u);
1853 if (!c)
1854 return false;
1855
1856 return c->delegate;
1857}
1858
e7ab4d1a
LP
1859void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
1860 assert(u);
1861
1862 if (!UNIT_HAS_CGROUP_CONTEXT(u))
1863 return;
1864
1865 if (m == 0)
1866 return;
1867
538b4852
TH
1868 /* always invalidate compat pairs together */
1869 if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
1870 m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
1871
e7ab4d1a
LP
1872 if ((u->cgroup_realized_mask & m) == 0)
1873 return;
1874
1875 u->cgroup_realized_mask &= ~m;
1876 unit_add_to_cgroup_queue(u);
1877}
1878
1879void manager_invalidate_startup_units(Manager *m) {
1880 Iterator i;
1881 Unit *u;
1882
1883 assert(m);
1884
1885 SET_FOREACH(u, m->startup_units, i)
13c31542 1886 unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO);
e7ab4d1a
LP
1887}
1888
4ad49000
LP
1889static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
1890 [CGROUP_AUTO] = "auto",
1891 [CGROUP_CLOSED] = "closed",
1892 [CGROUP_STRICT] = "strict",
1893};
4fbf50b3 1894
4ad49000 1895DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);