]>
Commit | Line | Data |
---|---|---|
d6c9574f | 1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
8e274523 LP |
2 | |
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
4ad49000 | 6 | Copyright 2013 Lennart Poettering |
8e274523 LP |
7 | |
8 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
9 | under the terms of the GNU Lesser General Public License as published by |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
8e274523 LP |
11 | (at your option) any later version. |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 16 | Lesser General Public License for more details. |
8e274523 | 17 | |
5430f7f2 | 18 | You should have received a copy of the GNU Lesser General Public License |
8e274523 LP |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
20 | ***/ | |
21 | ||
c6c18be3 | 22 | #include <fcntl.h> |
e41969e3 | 23 | #include <fnmatch.h> |
8c6db833 | 24 | |
9eb977db | 25 | #include "path-util.h" |
9444b1f2 | 26 | #include "special.h" |
4ad49000 LP |
27 | #include "cgroup-util.h" |
28 | #include "cgroup.h" | |
8e274523 | 29 | |
9a054909 LP |
30 | #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) |
31 | ||
4ad49000 LP |
32 | void cgroup_context_init(CGroupContext *c) { |
33 | assert(c); | |
34 | ||
35 | /* Initialize everything to the kernel defaults, assuming the | |
36 | * structure is preinitialized to 0 */ | |
37 | ||
db785129 LP |
38 | c->cpu_shares = (unsigned long) -1; |
39 | c->startup_cpu_shares = (unsigned long) -1; | |
ddca82ac | 40 | c->memory_limit = (uint64_t) -1; |
db785129 LP |
41 | c->blockio_weight = (unsigned long) -1; |
42 | c->startup_blockio_weight = (unsigned long) -1; | |
b2f8b02e | 43 | |
3a43da28 | 44 | c->cpu_quota_per_sec_usec = USEC_INFINITY; |
4ad49000 | 45 | } |
8e274523 | 46 | |
4ad49000 LP |
47 | void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { |
48 | assert(c); | |
49 | assert(a); | |
50 | ||
71fda00f | 51 | LIST_REMOVE(device_allow, c->device_allow, a); |
4ad49000 LP |
52 | free(a->path); |
53 | free(a); | |
54 | } | |
55 | ||
56 | void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) { | |
57 | assert(c); | |
58 | assert(w); | |
59 | ||
71fda00f | 60 | LIST_REMOVE(device_weights, c->blockio_device_weights, w); |
4ad49000 LP |
61 | free(w->path); |
62 | free(w); | |
63 | } | |
64 | ||
65 | void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) { | |
66 | assert(c); | |
8e274523 | 67 | assert(b); |
8e274523 | 68 | |
71fda00f | 69 | LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b); |
4ad49000 LP |
70 | free(b->path); |
71 | free(b); | |
72 | } | |
73 | ||
74 | void cgroup_context_done(CGroupContext *c) { | |
75 | assert(c); | |
76 | ||
77 | while (c->blockio_device_weights) | |
78 | cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); | |
79 | ||
80 | while (c->blockio_device_bandwidths) | |
81 | cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths); | |
82 | ||
83 | while (c->device_allow) | |
84 | cgroup_context_free_device_allow(c, c->device_allow); | |
85 | } | |
86 | ||
87 | void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { | |
88 | CGroupBlockIODeviceBandwidth *b; | |
89 | CGroupBlockIODeviceWeight *w; | |
90 | CGroupDeviceAllow *a; | |
9a054909 | 91 | char u[FORMAT_TIMESPAN_MAX]; |
4ad49000 LP |
92 | |
93 | assert(c); | |
94 | assert(f); | |
95 | ||
96 | prefix = strempty(prefix); | |
97 | ||
98 | fprintf(f, | |
99 | "%sCPUAccounting=%s\n" | |
100 | "%sBlockIOAccounting=%s\n" | |
101 | "%sMemoryAccounting=%s\n" | |
102 | "%sCPUShares=%lu\n" | |
95ae05c0 | 103 | "%sStartupCPUShares=%lu\n" |
b2f8b02e | 104 | "%sCPUQuotaPerSecSec=%s\n" |
112a7f46 | 105 | "%sBlockIOWeight=%lu\n" |
95ae05c0 | 106 | "%sStartupBlockIOWeight=%lu\n" |
4ad49000 | 107 | "%sMemoryLimit=%" PRIu64 "\n" |
a931ad47 LP |
108 | "%sDevicePolicy=%s\n" |
109 | "%sDelegate=%s\n", | |
4ad49000 LP |
110 | prefix, yes_no(c->cpu_accounting), |
111 | prefix, yes_no(c->blockio_accounting), | |
112 | prefix, yes_no(c->memory_accounting), | |
113 | prefix, c->cpu_shares, | |
95ae05c0 | 114 | prefix, c->startup_cpu_shares, |
b1d6dcf5 | 115 | prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), |
4ad49000 | 116 | prefix, c->blockio_weight, |
95ae05c0 | 117 | prefix, c->startup_blockio_weight, |
4ad49000 | 118 | prefix, c->memory_limit, |
a931ad47 LP |
119 | prefix, cgroup_device_policy_to_string(c->device_policy), |
120 | prefix, yes_no(c->delegate)); | |
4ad49000 LP |
121 | |
122 | LIST_FOREACH(device_allow, a, c->device_allow) | |
123 | fprintf(f, | |
124 | "%sDeviceAllow=%s %s%s%s\n", | |
125 | prefix, | |
126 | a->path, | |
127 | a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : ""); | |
128 | ||
129 | LIST_FOREACH(device_weights, w, c->blockio_device_weights) | |
130 | fprintf(f, | |
8e7076ca | 131 | "%sBlockIODeviceWeight=%s %lu", |
4ad49000 LP |
132 | prefix, |
133 | w->path, | |
134 | w->weight); | |
135 | ||
136 | LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { | |
137 | char buf[FORMAT_BYTES_MAX]; | |
138 | ||
139 | fprintf(f, | |
140 | "%s%s=%s %s\n", | |
141 | prefix, | |
142 | b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth", | |
143 | b->path, | |
144 | format_bytes(buf, sizeof(buf), b->bandwidth)); | |
145 | } | |
146 | } | |
147 | ||
148 | static int lookup_blkio_device(const char *p, dev_t *dev) { | |
149 | struct stat st; | |
150 | int r; | |
151 | ||
152 | assert(p); | |
153 | assert(dev); | |
154 | ||
155 | r = stat(p, &st); | |
4a62c710 MS |
156 | if (r < 0) |
157 | return log_warning_errno(errno, "Couldn't stat device %s: %m", p); | |
8e274523 | 158 | |
4ad49000 LP |
159 | if (S_ISBLK(st.st_mode)) |
160 | *dev = st.st_rdev; | |
161 | else if (major(st.st_dev) != 0) { | |
162 | /* If this is not a device node then find the block | |
163 | * device this file is stored on */ | |
164 | *dev = st.st_dev; | |
165 | ||
166 | /* If this is a partition, try to get the originating | |
167 | * block device */ | |
168 | block_get_whole_disk(*dev, dev); | |
169 | } else { | |
170 | log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p); | |
171 | return -ENODEV; | |
172 | } | |
8e274523 | 173 | |
8e274523 | 174 | return 0; |
8e274523 LP |
175 | } |
176 | ||
4ad49000 LP |
177 | static int whitelist_device(const char *path, const char *node, const char *acc) { |
178 | char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4]; | |
179 | struct stat st; | |
8c6db833 | 180 | int r; |
8e274523 | 181 | |
4ad49000 LP |
182 | assert(path); |
183 | assert(acc); | |
8e274523 | 184 | |
4ad49000 LP |
185 | if (stat(node, &st) < 0) { |
186 | log_warning("Couldn't stat device %s", node); | |
187 | return -errno; | |
188 | } | |
189 | ||
190 | if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { | |
191 | log_warning("%s is not a device.", node); | |
192 | return -ENODEV; | |
193 | } | |
194 | ||
195 | sprintf(buf, | |
196 | "%c %u:%u %s", | |
197 | S_ISCHR(st.st_mode) ? 'c' : 'b', | |
198 | major(st.st_rdev), minor(st.st_rdev), | |
199 | acc); | |
200 | ||
201 | r = cg_set_attribute("devices", path, "devices.allow", buf); | |
1aeab12b | 202 | if (r < 0) |
714e2e1d LP |
203 | log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r, |
204 | "Failed to set devices.allow on %s: %m", path); | |
4ad49000 LP |
205 | |
206 | return r; | |
8e274523 LP |
207 | } |
208 | ||
90060676 LP |
209 | static int whitelist_major(const char *path, const char *name, char type, const char *acc) { |
210 | _cleanup_fclose_ FILE *f = NULL; | |
211 | char line[LINE_MAX]; | |
212 | bool good = false; | |
213 | int r; | |
214 | ||
215 | assert(path); | |
216 | assert(acc); | |
217 | assert(type == 'b' || type == 'c'); | |
218 | ||
219 | f = fopen("/proc/devices", "re"); | |
4a62c710 MS |
220 | if (!f) |
221 | return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type); | |
90060676 LP |
222 | |
223 | FOREACH_LINE(line, f, goto fail) { | |
224 | char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w; | |
225 | unsigned maj; | |
226 | ||
227 | truncate_nl(line); | |
228 | ||
229 | if (type == 'c' && streq(line, "Character devices:")) { | |
230 | good = true; | |
231 | continue; | |
232 | } | |
233 | ||
234 | if (type == 'b' && streq(line, "Block devices:")) { | |
235 | good = true; | |
236 | continue; | |
237 | } | |
238 | ||
239 | if (isempty(line)) { | |
240 | good = false; | |
241 | continue; | |
242 | } | |
243 | ||
244 | if (!good) | |
245 | continue; | |
246 | ||
247 | p = strstrip(line); | |
248 | ||
249 | w = strpbrk(p, WHITESPACE); | |
250 | if (!w) | |
251 | continue; | |
252 | *w = 0; | |
253 | ||
254 | r = safe_atou(p, &maj); | |
255 | if (r < 0) | |
256 | continue; | |
257 | if (maj <= 0) | |
258 | continue; | |
259 | ||
260 | w++; | |
261 | w += strspn(w, WHITESPACE); | |
e41969e3 LP |
262 | |
263 | if (fnmatch(name, w, 0) != 0) | |
90060676 LP |
264 | continue; |
265 | ||
266 | sprintf(buf, | |
267 | "%c %u:* %s", | |
268 | type, | |
269 | maj, | |
270 | acc); | |
271 | ||
272 | r = cg_set_attribute("devices", path, "devices.allow", buf); | |
1aeab12b | 273 | if (r < 0) |
714e2e1d LP |
274 | log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r, |
275 | "Failed to set devices.allow on %s: %m", path); | |
90060676 LP |
276 | } |
277 | ||
278 | return 0; | |
279 | ||
280 | fail: | |
56f64d95 | 281 | log_warning_errno(errno, "Failed to read /proc/devices: %m"); |
90060676 LP |
282 | return -errno; |
283 | } | |
284 | ||
db785129 | 285 | void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) { |
01efdf13 | 286 | bool is_root; |
4ad49000 LP |
287 | int r; |
288 | ||
289 | assert(c); | |
290 | assert(path); | |
8e274523 | 291 | |
4ad49000 LP |
292 | if (mask == 0) |
293 | return; | |
8e274523 | 294 | |
71c26873 | 295 | /* Some cgroup attributes are not supported on the root cgroup, |
01efdf13 LP |
296 | * hence silently ignore */ |
297 | is_root = isempty(path) || path_equal(path, "/"); | |
6da13913 ZJS |
298 | if (is_root) |
299 | /* Make sure we don't try to display messages with an empty path. */ | |
300 | path = "/"; | |
01efdf13 | 301 | |
714e2e1d LP |
302 | /* We generally ignore errors caused by read-only mounted |
303 | * cgroup trees (assuming we are running in a container then), | |
304 | * and missing cgroups, i.e. EROFS and ENOENT. */ | |
305 | ||
01efdf13 | 306 | if ((mask & CGROUP_CPU) && !is_root) { |
b2f8b02e | 307 | char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1]; |
8e274523 | 308 | |
db785129 | 309 | sprintf(buf, "%lu\n", |
d81afec1 | 310 | IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares : |
db785129 | 311 | c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024); |
4ad49000 | 312 | r = cg_set_attribute("cpu", path, "cpu.shares", buf); |
1aeab12b | 313 | if (r < 0) |
714e2e1d LP |
314 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
315 | "Failed to set cpu.shares on %s: %m", path); | |
b2f8b02e | 316 | |
9a054909 | 317 | sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); |
b2f8b02e | 318 | r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); |
1aeab12b | 319 | if (r < 0) |
714e2e1d LP |
320 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
321 | "Failed to set cpu.cfs_period_us on %s: %m", path); | |
b2f8b02e | 322 | |
3a43da28 | 323 | if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { |
9a054909 | 324 | sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); |
b2f8b02e LP |
325 | r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf); |
326 | } else | |
327 | r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); | |
1aeab12b | 328 | if (r < 0) |
714e2e1d LP |
329 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
330 | "Failed to set cpu.cfs_quota_us on %s: %m", path); | |
4ad49000 LP |
331 | } |
332 | ||
333 | if (mask & CGROUP_BLKIO) { | |
334 | char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1, | |
335 | DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1, | |
336 | DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; | |
337 | CGroupBlockIODeviceWeight *w; | |
338 | CGroupBlockIODeviceBandwidth *b; | |
339 | ||
01efdf13 | 340 | if (!is_root) { |
d81afec1 | 341 | sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight : |
db785129 | 342 | c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000); |
01efdf13 | 343 | r = cg_set_attribute("blkio", path, "blkio.weight", buf); |
1aeab12b | 344 | if (r < 0) |
714e2e1d LP |
345 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
346 | "Failed to set blkio.weight on %s: %m", path); | |
4ad49000 | 347 | |
01efdf13 LP |
348 | /* FIXME: no way to reset this list */ |
349 | LIST_FOREACH(device_weights, w, c->blockio_device_weights) { | |
350 | dev_t dev; | |
4ad49000 | 351 | |
01efdf13 LP |
352 | r = lookup_blkio_device(w->path, &dev); |
353 | if (r < 0) | |
354 | continue; | |
8e274523 | 355 | |
01efdf13 LP |
356 | sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight); |
357 | r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); | |
1aeab12b | 358 | if (r < 0) |
714e2e1d LP |
359 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
360 | "Failed to set blkio.weight_device on %s: %m", path); | |
01efdf13 | 361 | } |
4ad49000 LP |
362 | } |
363 | ||
364 | /* FIXME: no way to reset this list */ | |
365 | LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { | |
366 | const char *a; | |
367 | dev_t dev; | |
368 | ||
369 | r = lookup_blkio_device(b->path, &dev); | |
370 | if (r < 0) | |
371 | continue; | |
372 | ||
373 | a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device"; | |
374 | ||
375 | sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth); | |
376 | r = cg_set_attribute("blkio", path, a, buf); | |
1aeab12b | 377 | if (r < 0) |
714e2e1d LP |
378 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
379 | "Failed to set %s on %s: %m", a, path); | |
d686d8a9 | 380 | } |
8e274523 LP |
381 | } |
382 | ||
a3bd89ea | 383 | if ((mask & CGROUP_MEMORY) && !is_root) { |
6a94f2e9 | 384 | if (c->memory_limit != (uint64_t) -1) { |
e58cec11 LP |
385 | char buf[DECIMAL_STR_MAX(uint64_t) + 1]; |
386 | ||
6a94f2e9 G |
387 | sprintf(buf, "%" PRIu64 "\n", c->memory_limit); |
388 | r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); | |
389 | } else | |
390 | r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); | |
8e274523 | 391 | |
1aeab12b | 392 | if (r < 0) |
714e2e1d LP |
393 | log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, |
394 | "Failed to set memory.limit_in_bytes on %s: %m", path); | |
4ad49000 | 395 | } |
8e274523 | 396 | |
01efdf13 | 397 | if ((mask & CGROUP_DEVICE) && !is_root) { |
4ad49000 | 398 | CGroupDeviceAllow *a; |
8e274523 | 399 | |
714e2e1d LP |
400 | /* Changing the devices list of a populated cgroup |
401 | * might result in EINVAL, hence ignore EINVAL | |
402 | * here. */ | |
403 | ||
4ad49000 LP |
404 | if (c->device_allow || c->device_policy != CGROUP_AUTO) |
405 | r = cg_set_attribute("devices", path, "devices.deny", "a"); | |
406 | else | |
407 | r = cg_set_attribute("devices", path, "devices.allow", "a"); | |
1aeab12b | 408 | if (r < 0) |
714e2e1d LP |
409 | log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r, |
410 | "Failed to reset devices.list on %s: %m", path); | |
fb385181 | 411 | |
4ad49000 LP |
412 | if (c->device_policy == CGROUP_CLOSED || |
413 | (c->device_policy == CGROUP_AUTO && c->device_allow)) { | |
414 | static const char auto_devices[] = | |
7d711efb LP |
415 | "/dev/null\0" "rwm\0" |
416 | "/dev/zero\0" "rwm\0" | |
417 | "/dev/full\0" "rwm\0" | |
418 | "/dev/random\0" "rwm\0" | |
419 | "/dev/urandom\0" "rwm\0" | |
420 | "/dev/tty\0" "rwm\0" | |
421 | "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */ | |
4ad49000 LP |
422 | |
423 | const char *x, *y; | |
424 | ||
425 | NULSTR_FOREACH_PAIR(x, y, auto_devices) | |
426 | whitelist_device(path, x, y); | |
7d711efb LP |
427 | |
428 | whitelist_major(path, "pts", 'c', "rw"); | |
429 | whitelist_major(path, "kdbus", 'c', "rw"); | |
430 | whitelist_major(path, "kdbus/*", 'c', "rw"); | |
4ad49000 LP |
431 | } |
432 | ||
433 | LIST_FOREACH(device_allow, a, c->device_allow) { | |
434 | char acc[4]; | |
435 | unsigned k = 0; | |
436 | ||
437 | if (a->r) | |
438 | acc[k++] = 'r'; | |
439 | if (a->w) | |
440 | acc[k++] = 'w'; | |
441 | if (a->m) | |
442 | acc[k++] = 'm'; | |
fb385181 | 443 | |
4ad49000 LP |
444 | if (k == 0) |
445 | continue; | |
fb385181 | 446 | |
4ad49000 | 447 | acc[k++] = 0; |
90060676 LP |
448 | |
449 | if (startswith(a->path, "/dev/")) | |
450 | whitelist_device(path, a->path, acc); | |
451 | else if (startswith(a->path, "block-")) | |
452 | whitelist_major(path, a->path + 6, 'b', acc); | |
453 | else if (startswith(a->path, "char-")) | |
454 | whitelist_major(path, a->path + 5, 'c', acc); | |
455 | else | |
456 | log_debug("Ignoring device %s while writing cgroup attribute.", a->path); | |
4ad49000 LP |
457 | } |
458 | } | |
fb385181 LP |
459 | } |
460 | ||
db785129 | 461 | CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) { |
4ad49000 | 462 | CGroupControllerMask mask = 0; |
8e274523 | 463 | |
4ad49000 | 464 | /* Figure out which controllers we need */ |
8e274523 | 465 | |
b2f8b02e | 466 | if (c->cpu_accounting || |
db785129 LP |
467 | c->cpu_shares != (unsigned long) -1 || |
468 | c->startup_cpu_shares != (unsigned long) -1 || | |
3a43da28 | 469 | c->cpu_quota_per_sec_usec != USEC_INFINITY) |
4ad49000 | 470 | mask |= CGROUP_CPUACCT | CGROUP_CPU; |
ecedd90f | 471 | |
4ad49000 | 472 | if (c->blockio_accounting || |
db785129 LP |
473 | c->blockio_weight != (unsigned long) -1 || |
474 | c->startup_blockio_weight != (unsigned long) -1 || | |
4ad49000 | 475 | c->blockio_device_weights || |
db785129 | 476 | c->blockio_device_bandwidths) |
4ad49000 | 477 | mask |= CGROUP_BLKIO; |
ecedd90f | 478 | |
4ad49000 | 479 | if (c->memory_accounting || |
ddca82ac | 480 | c->memory_limit != (uint64_t) -1) |
4ad49000 | 481 | mask |= CGROUP_MEMORY; |
8e274523 | 482 | |
a931ad47 LP |
483 | if (c->device_allow || |
484 | c->device_policy != CGROUP_AUTO) | |
4ad49000 LP |
485 | mask |= CGROUP_DEVICE; |
486 | ||
487 | return mask; | |
8e274523 LP |
488 | } |
489 | ||
bc432dc7 | 490 | CGroupControllerMask unit_get_cgroup_mask(Unit *u) { |
4ad49000 | 491 | CGroupContext *c; |
8e274523 | 492 | |
4ad49000 LP |
493 | c = unit_get_cgroup_context(u); |
494 | if (!c) | |
495 | return 0; | |
8e274523 | 496 | |
a931ad47 LP |
497 | /* If delegation is turned on, then turn on all cgroups, |
498 | * unless the process we fork into it is known to drop | |
499 | * privileges anyway, and shouldn't get access to the | |
500 | * controllers anyway. */ | |
501 | ||
502 | if (c->delegate) { | |
503 | ExecContext *e; | |
504 | ||
505 | e = unit_get_exec_context(u); | |
506 | if (!e || exec_context_maintains_privileges(e)) | |
507 | return _CGROUP_CONTROLLER_MASK_ALL; | |
508 | } | |
509 | ||
db785129 | 510 | return cgroup_context_get_mask(c); |
8e274523 LP |
511 | } |
512 | ||
bc432dc7 | 513 | CGroupControllerMask unit_get_members_mask(Unit *u) { |
4ad49000 | 514 | assert(u); |
bc432dc7 LP |
515 | |
516 | if (u->cgroup_members_mask_valid) | |
517 | return u->cgroup_members_mask; | |
518 | ||
519 | u->cgroup_members_mask = 0; | |
520 | ||
521 | if (u->type == UNIT_SLICE) { | |
522 | Unit *member; | |
523 | Iterator i; | |
524 | ||
525 | SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) { | |
526 | ||
527 | if (member == u) | |
528 | continue; | |
529 | ||
d4fdc205 | 530 | if (UNIT_DEREF(member->slice) != u) |
bc432dc7 LP |
531 | continue; |
532 | ||
533 | u->cgroup_members_mask |= | |
534 | unit_get_cgroup_mask(member) | | |
535 | unit_get_members_mask(member); | |
536 | } | |
537 | } | |
538 | ||
539 | u->cgroup_members_mask_valid = true; | |
6414b7c9 | 540 | return u->cgroup_members_mask; |
246aa6dd LP |
541 | } |
542 | ||
bc432dc7 | 543 | CGroupControllerMask unit_get_siblings_mask(Unit *u) { |
4ad49000 | 544 | assert(u); |
246aa6dd | 545 | |
bc432dc7 | 546 | if (UNIT_ISSET(u->slice)) |
637f421e | 547 | return unit_get_members_mask(UNIT_DEREF(u->slice)); |
4ad49000 | 548 | |
637f421e | 549 | return unit_get_cgroup_mask(u) | unit_get_members_mask(u); |
246aa6dd LP |
550 | } |
551 | ||
bc432dc7 | 552 | CGroupControllerMask unit_get_target_mask(Unit *u) { |
6414b7c9 DS |
553 | CGroupControllerMask mask; |
554 | ||
555 | mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u); | |
556 | mask &= u->manager->cgroup_supported; | |
557 | ||
558 | return mask; | |
559 | } | |
560 | ||
561 | /* Recurse from a unit up through its containing slices, propagating | |
562 | * mask bits upward. A unit is also member of itself. */ | |
bc432dc7 LP |
563 | void unit_update_cgroup_members_masks(Unit *u) { |
564 | CGroupControllerMask m; | |
565 | bool more; | |
566 | ||
567 | assert(u); | |
568 | ||
569 | /* Calculate subtree mask */ | |
570 | m = unit_get_cgroup_mask(u) | unit_get_members_mask(u); | |
571 | ||
572 | /* See if anything changed from the previous invocation. If | |
573 | * not, we're done. */ | |
574 | if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask) | |
575 | return; | |
576 | ||
577 | more = | |
578 | u->cgroup_subtree_mask_valid && | |
579 | ((m & ~u->cgroup_subtree_mask) != 0) && | |
580 | ((~m & u->cgroup_subtree_mask) == 0); | |
581 | ||
582 | u->cgroup_subtree_mask = m; | |
583 | u->cgroup_subtree_mask_valid = true; | |
584 | ||
6414b7c9 DS |
585 | if (UNIT_ISSET(u->slice)) { |
586 | Unit *s = UNIT_DEREF(u->slice); | |
bc432dc7 LP |
587 | |
588 | if (more) | |
589 | /* There's more set now than before. We | |
590 | * propagate the new mask to the parent's mask | |
591 | * (not caring if it actually was valid or | |
592 | * not). */ | |
593 | ||
594 | s->cgroup_members_mask |= m; | |
595 | ||
596 | else | |
597 | /* There's less set now than before (or we | |
598 | * don't know), we need to recalculate | |
599 | * everything, so let's invalidate the | |
600 | * parent's members mask */ | |
601 | ||
602 | s->cgroup_members_mask_valid = false; | |
603 | ||
604 | /* And now make sure that this change also hits our | |
605 | * grandparents */ | |
606 | unit_update_cgroup_members_masks(s); | |
6414b7c9 DS |
607 | } |
608 | } | |
609 | ||
03b90d4b LP |
610 | static const char *migrate_callback(CGroupControllerMask mask, void *userdata) { |
611 | Unit *u = userdata; | |
612 | ||
613 | assert(mask != 0); | |
614 | assert(u); | |
615 | ||
616 | while (u) { | |
617 | if (u->cgroup_path && | |
618 | u->cgroup_realized && | |
619 | (u->cgroup_realized_mask & mask) == mask) | |
620 | return u->cgroup_path; | |
621 | ||
622 | u = UNIT_DEREF(u->slice); | |
623 | } | |
624 | ||
625 | return NULL; | |
626 | } | |
627 | ||
4ad49000 | 628 | static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) { |
0cd385d3 | 629 | CGroupContext *c; |
bc432dc7 | 630 | int r; |
64747e2d | 631 | |
4ad49000 | 632 | assert(u); |
64747e2d | 633 | |
0cd385d3 LP |
634 | c = unit_get_cgroup_context(u); |
635 | if (!c) | |
636 | return 0; | |
637 | ||
7b3fd631 LP |
638 | if (!u->cgroup_path) { |
639 | _cleanup_free_ char *path = NULL; | |
64747e2d | 640 | |
7b3fd631 LP |
641 | path = unit_default_cgroup_path(u); |
642 | if (!path) | |
643 | return log_oom(); | |
644 | ||
645 | r = hashmap_put(u->manager->cgroup_unit, path, u); | |
646 | if (r < 0) { | |
647 | log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r)); | |
648 | return r; | |
649 | } | |
650 | if (r > 0) { | |
651 | u->cgroup_path = path; | |
652 | path = NULL; | |
653 | } | |
b58b8e11 HH |
654 | } |
655 | ||
03b90d4b LP |
656 | /* First, create our own group */ |
657 | r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path); | |
23bbb0de MS |
658 | if (r < 0) |
659 | return log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path); | |
03b90d4b LP |
660 | |
661 | /* Keep track that this is now realized */ | |
4ad49000 | 662 | u->cgroup_realized = true; |
bc432dc7 | 663 | u->cgroup_realized_mask = mask; |
4ad49000 | 664 | |
0cd385d3 LP |
665 | if (u->type != UNIT_SLICE && !c->delegate) { |
666 | ||
667 | /* Then, possibly move things over, but not if | |
668 | * subgroups may contain processes, which is the case | |
669 | * for slice and delegation units. */ | |
670 | r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u); | |
671 | if (r < 0) | |
672 | log_warning_errno(r, "Failed to migrate cgroup from to %s: %m", u->cgroup_path); | |
673 | } | |
03b90d4b | 674 | |
64747e2d LP |
675 | return 0; |
676 | } | |
677 | ||
7b3fd631 LP |
678 | int unit_attach_pids_to_cgroup(Unit *u) { |
679 | int r; | |
680 | assert(u); | |
681 | ||
682 | r = unit_realize_cgroup(u); | |
683 | if (r < 0) | |
684 | return r; | |
685 | ||
686 | r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u); | |
687 | if (r < 0) | |
688 | return r; | |
689 | ||
690 | return 0; | |
691 | } | |
692 | ||
6414b7c9 | 693 | static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) { |
bc432dc7 LP |
694 | assert(u); |
695 | ||
696 | return u->cgroup_realized && u->cgroup_realized_mask == mask; | |
6414b7c9 DS |
697 | } |
698 | ||
699 | /* Check if necessary controllers and attributes for a unit are in place. | |
700 | * | |
701 | * If so, do nothing. | |
702 | * If not, create paths, move processes over, and set attributes. | |
703 | * | |
704 | * Returns 0 on success and < 0 on failure. */ | |
db785129 | 705 | static int unit_realize_cgroup_now(Unit *u, ManagerState state) { |
4ad49000 | 706 | CGroupControllerMask mask; |
6414b7c9 | 707 | int r; |
64747e2d | 708 | |
4ad49000 | 709 | assert(u); |
64747e2d | 710 | |
4ad49000 | 711 | if (u->in_cgroup_queue) { |
71fda00f | 712 | LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u); |
4ad49000 LP |
713 | u->in_cgroup_queue = false; |
714 | } | |
64747e2d | 715 | |
6414b7c9 | 716 | mask = unit_get_target_mask(u); |
64747e2d | 717 | |
6414b7c9 | 718 | if (unit_has_mask_realized(u, mask)) |
0a1eb06d | 719 | return 0; |
64747e2d | 720 | |
4ad49000 | 721 | /* First, realize parents */ |
6414b7c9 | 722 | if (UNIT_ISSET(u->slice)) { |
db785129 | 723 | r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state); |
6414b7c9 DS |
724 | if (r < 0) |
725 | return r; | |
726 | } | |
4ad49000 LP |
727 | |
728 | /* And then do the real work */ | |
6414b7c9 DS |
729 | r = unit_create_cgroups(u, mask); |
730 | if (r < 0) | |
731 | return r; | |
732 | ||
733 | /* Finally, apply the necessary attributes. */ | |
db785129 | 734 | cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state); |
6414b7c9 DS |
735 | |
736 | return 0; | |
64747e2d LP |
737 | } |
738 | ||
4ad49000 | 739 | static void unit_add_to_cgroup_queue(Unit *u) { |
ecedd90f | 740 | |
4ad49000 LP |
741 | if (u->in_cgroup_queue) |
742 | return; | |
8e274523 | 743 | |
71fda00f | 744 | LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u); |
4ad49000 LP |
745 | u->in_cgroup_queue = true; |
746 | } | |
8c6db833 | 747 | |
4ad49000 | 748 | unsigned manager_dispatch_cgroup_queue(Manager *m) { |
db785129 | 749 | ManagerState state; |
4ad49000 | 750 | unsigned n = 0; |
db785129 | 751 | Unit *i; |
6414b7c9 | 752 | int r; |
ecedd90f | 753 | |
db785129 LP |
754 | state = manager_state(m); |
755 | ||
4ad49000 LP |
756 | while ((i = m->cgroup_queue)) { |
757 | assert(i->in_cgroup_queue); | |
ecedd90f | 758 | |
db785129 | 759 | r = unit_realize_cgroup_now(i, state); |
6414b7c9 | 760 | if (r < 0) |
da927ba9 | 761 | log_warning_errno(r, "Failed to realize cgroups for queued unit %s: %m", i->id); |
0a1eb06d | 762 | |
4ad49000 LP |
763 | n++; |
764 | } | |
ecedd90f | 765 | |
4ad49000 | 766 | return n; |
8e274523 LP |
767 | } |
768 | ||
4ad49000 LP |
769 | static void unit_queue_siblings(Unit *u) { |
770 | Unit *slice; | |
ca949c9d | 771 | |
4ad49000 LP |
772 | /* This adds the siblings of the specified unit and the |
773 | * siblings of all parent units to the cgroup queue. (But | |
774 | * neither the specified unit itself nor the parents.) */ | |
775 | ||
776 | while ((slice = UNIT_DEREF(u->slice))) { | |
777 | Iterator i; | |
778 | Unit *m; | |
8f53a7b8 | 779 | |
4ad49000 LP |
780 | SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) { |
781 | if (m == u) | |
782 | continue; | |
8e274523 | 783 | |
6414b7c9 DS |
784 | /* Skip units that have a dependency on the slice |
785 | * but aren't actually in it. */ | |
4ad49000 | 786 | if (UNIT_DEREF(m->slice) != slice) |
50159e6a | 787 | continue; |
8e274523 | 788 | |
6414b7c9 DS |
789 | /* No point in doing cgroup application for units |
790 | * without active processes. */ | |
791 | if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m))) | |
792 | continue; | |
793 | ||
794 | /* If the unit doesn't need any new controllers | |
795 | * and has current ones realized, it doesn't need | |
796 | * any changes. */ | |
797 | if (unit_has_mask_realized(m, unit_get_target_mask(m))) | |
798 | continue; | |
799 | ||
4ad49000 | 800 | unit_add_to_cgroup_queue(m); |
50159e6a LP |
801 | } |
802 | ||
4ad49000 | 803 | u = slice; |
8e274523 | 804 | } |
4ad49000 LP |
805 | } |
806 | ||
0a1eb06d | 807 | int unit_realize_cgroup(Unit *u) { |
4ad49000 LP |
808 | CGroupContext *c; |
809 | ||
810 | assert(u); | |
811 | ||
812 | c = unit_get_cgroup_context(u); | |
813 | if (!c) | |
0a1eb06d | 814 | return 0; |
8e274523 | 815 | |
4ad49000 LP |
816 | /* So, here's the deal: when realizing the cgroups for this |
817 | * unit, we need to first create all parents, but there's more | |
818 | * actually: for the weight-based controllers we also need to | |
819 | * make sure that all our siblings (i.e. units that are in the | |
73e231ab | 820 | * same slice as we are) have cgroups, too. Otherwise, things |
4ad49000 LP |
821 | * would become very uneven as each of their processes would |
822 | * get as much resources as all our group together. This call | |
823 | * will synchronously create the parent cgroups, but will | |
824 | * defer work on the siblings to the next event loop | |
825 | * iteration. */ | |
ca949c9d | 826 | |
4ad49000 LP |
827 | /* Add all sibling slices to the cgroup queue. */ |
828 | unit_queue_siblings(u); | |
829 | ||
6414b7c9 | 830 | /* And realize this one now (and apply the values) */ |
db785129 | 831 | return unit_realize_cgroup_now(u, manager_state(u->manager)); |
8e274523 LP |
832 | } |
833 | ||
b1491eba | 834 | void unit_destroy_cgroup_if_empty(Unit *u) { |
8e274523 LP |
835 | int r; |
836 | ||
4ad49000 | 837 | assert(u); |
8e274523 | 838 | |
4ad49000 LP |
839 | if (!u->cgroup_path) |
840 | return; | |
8e274523 | 841 | |
13b84ec7 | 842 | r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE)); |
dab5bf85 | 843 | if (r < 0) { |
da927ba9 | 844 | log_debug_errno(r, "Failed to destroy cgroup %s: %m", u->cgroup_path); |
dab5bf85 RL |
845 | return; |
846 | } | |
8e274523 | 847 | |
0a1eb06d LP |
848 | hashmap_remove(u->manager->cgroup_unit, u->cgroup_path); |
849 | ||
4ad49000 LP |
850 | free(u->cgroup_path); |
851 | u->cgroup_path = NULL; | |
852 | u->cgroup_realized = false; | |
bc432dc7 | 853 | u->cgroup_realized_mask = 0; |
8e274523 LP |
854 | } |
855 | ||
4ad49000 LP |
856 | pid_t unit_search_main_pid(Unit *u) { |
857 | _cleanup_fclose_ FILE *f = NULL; | |
858 | pid_t pid = 0, npid, mypid; | |
859 | ||
860 | assert(u); | |
861 | ||
862 | if (!u->cgroup_path) | |
863 | return 0; | |
864 | ||
865 | if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0) | |
866 | return 0; | |
867 | ||
868 | mypid = getpid(); | |
869 | while (cg_read_pid(f, &npid) > 0) { | |
870 | pid_t ppid; | |
871 | ||
872 | if (npid == pid) | |
873 | continue; | |
8e274523 | 874 | |
4ad49000 LP |
875 | /* Ignore processes that aren't our kids */ |
876 | if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid) | |
877 | continue; | |
8e274523 | 878 | |
4ad49000 LP |
879 | if (pid != 0) { |
880 | /* Dang, there's more than one daemonized PID | |
881 | in this group, so we don't know what process | |
882 | is the main process. */ | |
883 | pid = 0; | |
884 | break; | |
885 | } | |
8e274523 | 886 | |
4ad49000 | 887 | pid = npid; |
8e274523 LP |
888 | } |
889 | ||
4ad49000 | 890 | return pid; |
8e274523 LP |
891 | } |
892 | ||
8e274523 | 893 | int manager_setup_cgroup(Manager *m) { |
9444b1f2 | 894 | _cleanup_free_ char *path = NULL; |
8e274523 | 895 | int r; |
8e274523 LP |
896 | |
897 | assert(m); | |
898 | ||
35d2e7ec | 899 | /* 1. Determine hierarchy */ |
9444b1f2 LP |
900 | free(m->cgroup_root); |
901 | m->cgroup_root = NULL; | |
902 | ||
903 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root); | |
23bbb0de MS |
904 | if (r < 0) |
905 | return log_error_errno(r, "Cannot determine cgroup we are running in: %m"); | |
8e274523 | 906 | |
15c60e99 LP |
907 | /* LEGACY: Already in /system.slice? If so, let's cut this |
908 | * off. This is to support live upgrades from older systemd | |
909 | * versions where PID 1 was moved there. */ | |
9444b1f2 | 910 | if (m->running_as == SYSTEMD_SYSTEM) { |
0d8c31ff ZJS |
911 | char *e; |
912 | ||
9444b1f2 | 913 | e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE); |
15c60e99 LP |
914 | if (!e) |
915 | e = endswith(m->cgroup_root, "/system"); | |
9444b1f2 LP |
916 | if (e) |
917 | *e = 0; | |
0baf24dd | 918 | } |
7ccfb64a | 919 | |
9444b1f2 LP |
920 | /* And make sure to store away the root value without trailing |
921 | * slash, even for the root dir, so that we can easily prepend | |
922 | * it everywhere. */ | |
923 | if (streq(m->cgroup_root, "/")) | |
924 | m->cgroup_root[0] = 0; | |
8e274523 | 925 | |
35d2e7ec | 926 | /* 2. Show data */ |
9444b1f2 | 927 | r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path); |
23bbb0de MS |
928 | if (r < 0) |
929 | return log_error_errno(r, "Cannot find cgroup mount point: %m"); | |
8e274523 | 930 | |
c6c18be3 | 931 | log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); |
0d8c31ff | 932 | if (!m->test_run) { |
c6c18be3 | 933 | |
0d8c31ff ZJS |
934 | /* 3. Install agent */ |
935 | if (m->running_as == SYSTEMD_SYSTEM) { | |
936 | r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH); | |
937 | if (r < 0) | |
da927ba9 | 938 | log_warning_errno(r, "Failed to install release agent, ignoring: %m"); |
0d8c31ff ZJS |
939 | else if (r > 0) |
940 | log_debug("Installed release agent."); | |
941 | else | |
942 | log_debug("Release agent already installed."); | |
943 | } | |
8e274523 | 944 | |
0d8c31ff ZJS |
945 | /* 4. Make sure we are in the root cgroup */ |
946 | r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0); | |
23bbb0de MS |
947 | if (r < 0) |
948 | return log_error_errno(r, "Failed to create root cgroup hierarchy: %m"); | |
c6c18be3 | 949 | |
0d8c31ff ZJS |
950 | /* 5. And pin it, so that it cannot be unmounted */ |
951 | safe_close(m->pin_cgroupfs_fd); | |
c6c18be3 | 952 | |
0d8c31ff | 953 | m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK); |
4a62c710 MS |
954 | if (m->pin_cgroupfs_fd < 0) |
955 | return log_error_errno(errno, "Failed to open pin file: %m"); | |
0d8c31ff | 956 | |
cc98b302 | 957 | /* 6. Always enable hierarchical support if it exists... */ |
0d8c31ff | 958 | cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); |
c6c18be3 LP |
959 | } |
960 | ||
0d8c31ff | 961 | /* 7. Figure out which controllers are supported */ |
4ad49000 | 962 | m->cgroup_supported = cg_mask_supported(); |
9156e799 | 963 | |
a32360f1 | 964 | return 0; |
8e274523 LP |
965 | } |
966 | ||
c6c18be3 | 967 | void manager_shutdown_cgroup(Manager *m, bool delete) { |
8e274523 LP |
968 | assert(m); |
969 | ||
9444b1f2 LP |
970 | /* We can't really delete the group, since we are in it. But |
971 | * let's trim it. */ | |
972 | if (delete && m->cgroup_root) | |
973 | cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false); | |
8e274523 | 974 | |
03e334a1 | 975 | m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd); |
c6c18be3 | 976 | |
9444b1f2 LP |
977 | free(m->cgroup_root); |
978 | m->cgroup_root = NULL; | |
8e274523 LP |
979 | } |
980 | ||
4ad49000 | 981 | Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { |
acb14d31 | 982 | char *p; |
4ad49000 | 983 | Unit *u; |
acb14d31 LP |
984 | |
985 | assert(m); | |
986 | assert(cgroup); | |
acb14d31 | 987 | |
4ad49000 LP |
988 | u = hashmap_get(m->cgroup_unit, cgroup); |
989 | if (u) | |
990 | return u; | |
acb14d31 | 991 | |
8e70580b | 992 | p = strdupa(cgroup); |
acb14d31 LP |
993 | for (;;) { |
994 | char *e; | |
995 | ||
996 | e = strrchr(p, '/'); | |
4ad49000 LP |
997 | if (e == p || !e) |
998 | return NULL; | |
acb14d31 LP |
999 | |
1000 | *e = 0; | |
1001 | ||
4ad49000 LP |
1002 | u = hashmap_get(m->cgroup_unit, p); |
1003 | if (u) | |
1004 | return u; | |
acb14d31 LP |
1005 | } |
1006 | } | |
1007 | ||
4ad49000 LP |
1008 | Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { |
1009 | _cleanup_free_ char *cgroup = NULL; | |
acb14d31 | 1010 | int r; |
8e274523 | 1011 | |
8c47c732 LP |
1012 | assert(m); |
1013 | ||
1014 | if (pid <= 1) | |
1015 | return NULL; | |
1016 | ||
4ad49000 LP |
1017 | r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); |
1018 | if (r < 0) | |
6dde1f33 LP |
1019 | return NULL; |
1020 | ||
4ad49000 | 1021 | return manager_get_unit_by_cgroup(m, cgroup); |
6dde1f33 | 1022 | } |
4fbf50b3 | 1023 | |
4ad49000 LP |
1024 | int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { |
1025 | Unit *u; | |
1026 | int r; | |
4fbf50b3 | 1027 | |
4ad49000 LP |
1028 | assert(m); |
1029 | assert(cgroup); | |
4fbf50b3 | 1030 | |
4ad49000 | 1031 | u = manager_get_unit_by_cgroup(m, cgroup); |
5ad096b3 LP |
1032 | if (!u) |
1033 | return 0; | |
b56c28c3 | 1034 | |
5ad096b3 LP |
1035 | r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true); |
1036 | if (r <= 0) | |
1037 | return r; | |
1038 | ||
1039 | if (UNIT_VTABLE(u)->notify_cgroup_empty) | |
1040 | UNIT_VTABLE(u)->notify_cgroup_empty(u); | |
1041 | ||
1042 | unit_add_to_gc_queue(u); | |
1043 | return 0; | |
1044 | } | |
1045 | ||
1046 | int unit_get_memory_current(Unit *u, uint64_t *ret) { | |
1047 | _cleanup_free_ char *v = NULL; | |
1048 | int r; | |
1049 | ||
1050 | assert(u); | |
1051 | assert(ret); | |
1052 | ||
1053 | if (!u->cgroup_path) | |
1054 | return -ENODATA; | |
1055 | ||
1056 | if ((u->cgroup_realized_mask & CGROUP_MEMORY) == 0) | |
1057 | return -ENODATA; | |
1058 | ||
1059 | r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); | |
1060 | if (r == -ENOENT) | |
1061 | return -ENODATA; | |
1062 | if (r < 0) | |
1063 | return r; | |
1064 | ||
1065 | return safe_atou64(v, ret); | |
1066 | } | |
1067 | ||
1068 | static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { | |
1069 | _cleanup_free_ char *v = NULL; | |
1070 | uint64_t ns; | |
1071 | int r; | |
1072 | ||
1073 | assert(u); | |
1074 | assert(ret); | |
1075 | ||
1076 | if (!u->cgroup_path) | |
1077 | return -ENODATA; | |
1078 | ||
1079 | if ((u->cgroup_realized_mask & CGROUP_CPUACCT) == 0) | |
1080 | return -ENODATA; | |
1081 | ||
1082 | r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); | |
1083 | if (r == -ENOENT) | |
1084 | return -ENODATA; | |
1085 | if (r < 0) | |
1086 | return r; | |
1087 | ||
1088 | r = safe_atou64(v, &ns); | |
1089 | if (r < 0) | |
1090 | return r; | |
1091 | ||
1092 | *ret = ns; | |
1093 | return 0; | |
1094 | } | |
1095 | ||
1096 | int unit_get_cpu_usage(Unit *u, nsec_t *ret) { | |
1097 | nsec_t ns; | |
1098 | int r; | |
1099 | ||
1100 | r = unit_get_cpu_usage_raw(u, &ns); | |
1101 | if (r < 0) | |
1102 | return r; | |
1103 | ||
1104 | if (ns > u->cpuacct_usage_base) | |
1105 | ns -= u->cpuacct_usage_base; | |
1106 | else | |
1107 | ns = 0; | |
1108 | ||
1109 | *ret = ns; | |
1110 | return 0; | |
1111 | } | |
1112 | ||
1113 | int unit_reset_cpu_usage(Unit *u) { | |
1114 | nsec_t ns; | |
1115 | int r; | |
1116 | ||
1117 | assert(u); | |
1118 | ||
1119 | r = unit_get_cpu_usage_raw(u, &ns); | |
1120 | if (r < 0) { | |
1121 | u->cpuacct_usage_base = 0; | |
1122 | return r; | |
b56c28c3 | 1123 | } |
2633eb83 | 1124 | |
5ad096b3 | 1125 | u->cpuacct_usage_base = ns; |
4ad49000 | 1126 | return 0; |
4fbf50b3 LP |
1127 | } |
1128 | ||
4ad49000 LP |
1129 | static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { |
1130 | [CGROUP_AUTO] = "auto", | |
1131 | [CGROUP_CLOSED] = "closed", | |
1132 | [CGROUP_STRICT] = "strict", | |
1133 | }; | |
4fbf50b3 | 1134 | |
4ad49000 | 1135 | DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy); |