]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/core/cgroup.c
nspawn: introduce the new /machine/ tree in the cgroup tree and move containers there
[thirdparty/systemd.git] / src / core / cgroup.c
CommitLineData
d6c9574f 1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
8e274523
LP
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
8e274523
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
8e274523 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
8e274523
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <errno.h>
23#include <assert.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <signal.h>
27#include <sys/mount.h>
c6c18be3 28#include <fcntl.h>
8c6db833 29
8e274523 30#include "cgroup.h"
8c6db833 31#include "cgroup-util.h"
8e274523 32#include "log.h"
9156e799 33#include "strv.h"
9eb977db 34#include "path-util.h"
8e274523 35
8e274523
LP
36int cgroup_bonding_realize(CGroupBonding *b) {
37 int r;
38
39 assert(b);
40 assert(b->path);
41 assert(b->controller);
42
a32360f1 43 r = cg_create(b->controller, b->path, NULL);
ab1f0633
LP
44 if (r < 0) {
45 log_warning("Failed to create cgroup %s:%s: %s", b->controller, b->path, strerror(-r));
8c6db833 46 return r;
ab1f0633 47 }
8e274523 48
8c6db833 49 b->realized = true;
8e274523 50
8e274523 51 return 0;
8e274523
LP
52}
53
54int cgroup_bonding_realize_list(CGroupBonding *first) {
55 CGroupBonding *b;
8c6db833 56 int r;
8e274523 57
8c6db833 58 LIST_FOREACH(by_unit, b, first)
d686d8a9 59 if ((r = cgroup_bonding_realize(b)) < 0 && b->essential)
8e274523 60 return r;
8e274523
LP
61
62 return 0;
63}
64
8d53b453 65void cgroup_bonding_free(CGroupBonding *b, bool trim) {
8e274523
LP
66 assert(b);
67
68 if (b->unit) {
69 CGroupBonding *f;
70
ac155bb8 71 LIST_REMOVE(CGroupBonding, by_unit, b->unit->cgroup_bondings, b);
8e274523 72
d686d8a9 73 if (streq(b->controller, SYSTEMD_CGROUP_CONTROLLER)) {
ac155bb8 74 assert_se(f = hashmap_get(b->unit->manager->cgroup_bondings, b->path));
d686d8a9 75 LIST_REMOVE(CGroupBonding, by_path, f, b);
8e274523 76
d686d8a9 77 if (f)
ac155bb8 78 hashmap_replace(b->unit->manager->cgroup_bondings, b->path, f);
d686d8a9 79 else
ac155bb8 80 hashmap_remove(b->unit->manager->cgroup_bondings, b->path);
d686d8a9 81 }
8e274523
LP
82 }
83
8d53b453
LP
84 if (b->realized && b->ours && trim)
85 cg_trim(b->controller, b->path, false);
8e274523 86
c9106f61
LP
87 free(b->controller);
88 free(b->path);
8e274523
LP
89 free(b);
90}
91
38c52d46 92void cgroup_bonding_free_list(CGroupBonding *first, bool remove_or_trim) {
8e274523
LP
93 CGroupBonding *b, *n;
94
95 LIST_FOREACH_SAFE(by_unit, b, n, first)
38c52d46 96 cgroup_bonding_free(b, remove_or_trim);
8e274523
LP
97}
98
fb385181
LP
99void cgroup_bonding_trim(CGroupBonding *b, bool delete_root) {
100 assert(b);
101
d686d8a9 102 if (b->realized && b->ours)
fb385181
LP
103 cg_trim(b->controller, b->path, delete_root);
104}
105
106void cgroup_bonding_trim_list(CGroupBonding *first, bool delete_root) {
107 CGroupBonding *b;
108
109 LIST_FOREACH(by_unit, b, first)
110 cgroup_bonding_trim(b, delete_root);
111}
112
ecedd90f 113int cgroup_bonding_install(CGroupBonding *b, pid_t pid, const char *cgroup_suffix) {
974efc46 114 _cleanup_free_ char *p = NULL;
ecedd90f 115 const char *path;
8e274523
LP
116 int r;
117
118 assert(b);
119 assert(pid >= 0);
120
ecedd90f 121 if (cgroup_suffix) {
b7def684 122 p = strjoin(b->path, "/", cgroup_suffix, NULL);
ecedd90f
LP
123 if (!p)
124 return -ENOMEM;
125
126 path = p;
127 } else
128 path = b->path;
129
130 r = cg_create_and_attach(b->controller, path, pid);
ecedd90f 131 if (r < 0)
8c6db833 132 return r;
8e274523 133
8c6db833 134 b->realized = true;
8e274523
LP
135 return 0;
136}
137
ecedd90f 138int cgroup_bonding_install_list(CGroupBonding *first, pid_t pid, const char *cgroup_suffix) {
8e274523 139 CGroupBonding *b;
8c6db833 140 int r;
8e274523 141
ecedd90f
LP
142 LIST_FOREACH(by_unit, b, first) {
143 r = cgroup_bonding_install(b, pid, cgroup_suffix);
144 if (r < 0 && b->essential)
8e274523 145 return r;
ecedd90f 146 }
8e274523
LP
147
148 return 0;
149}
150
246aa6dd
LP
151int cgroup_bonding_migrate(CGroupBonding *b, CGroupBonding *list) {
152 CGroupBonding *q;
153 int ret = 0;
154
155 LIST_FOREACH(by_unit, q, list) {
156 int r;
157
158 if (q == b)
159 continue;
160
161 if (!q->ours)
162 continue;
163
164 r = cg_migrate_recursive(q->controller, q->path, b->controller, b->path, true, false);
165 if (r < 0 && ret == 0)
166 ret = r;
167 }
168
169 return ret;
170}
171
172int cgroup_bonding_migrate_to(CGroupBonding *b, const char *target, bool rem) {
173 assert(b);
174 assert(target);
175
176 return cg_migrate_recursive(b->controller, b->path, b->controller, target, true, rem);
177}
178
64747e2d
LP
179int cgroup_bonding_set_group_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid) {
180 assert(b);
181
182 if (!b->realized)
183 return -EINVAL;
184
185 return cg_set_group_access(b->controller, b->path, mode, uid, gid);
186}
187
188int cgroup_bonding_set_group_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid) {
189 CGroupBonding *b;
190 int r;
191
192 LIST_FOREACH(by_unit, b, first) {
193 r = cgroup_bonding_set_group_access(b, mode, uid, gid);
194 if (r < 0)
195 return r;
196 }
197
198 return 0;
199}
200
8d53b453 201int cgroup_bonding_set_task_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky) {
64747e2d
LP
202 assert(b);
203
204 if (!b->realized)
205 return -EINVAL;
206
8d53b453 207 return cg_set_task_access(b->controller, b->path, mode, uid, gid, sticky);
64747e2d
LP
208}
209
8d53b453 210int cgroup_bonding_set_task_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid, int sticky) {
64747e2d
LP
211 CGroupBonding *b;
212 int r;
213
214 LIST_FOREACH(by_unit, b, first) {
8d53b453 215 r = cgroup_bonding_set_task_access(b, mode, uid, gid, sticky);
64747e2d
LP
216 if (r < 0)
217 return r;
218 }
219
220 return 0;
221}
222
88f3e0c9 223int cgroup_bonding_kill(CGroupBonding *b, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
ecedd90f
LP
224 char *p = NULL;
225 const char *path;
226 int r;
227
8e274523 228 assert(b);
8c6db833 229 assert(sig >= 0);
8e274523 230
d686d8a9 231 /* Don't kill cgroups that aren't ours */
31e54cc8 232 if (!b->ours)
d686d8a9 233 return 0;
8c6db833 234
ecedd90f 235 if (cgroup_suffix) {
b7def684 236 p = strjoin(b->path, "/", cgroup_suffix, NULL);
ecedd90f
LP
237 if (!p)
238 return -ENOMEM;
239
240 path = p;
241 } else
242 path = b->path;
243
88f3e0c9 244 r = cg_kill_recursive(b->controller, path, sig, sigcont, true, rem, s);
ecedd90f
LP
245 free(p);
246
247 return r;
8e274523
LP
248}
249
88f3e0c9 250int cgroup_bonding_kill_list(CGroupBonding *first, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
8e274523 251 CGroupBonding *b;
ca949c9d
LP
252 Set *allocated_set = NULL;
253 int ret = -EAGAIN, r;
254
8f53a7b8
LP
255 if (!first)
256 return 0;
257
ca949c9d
LP
258 if (!s)
259 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
260 return -ENOMEM;
8e274523
LP
261
262 LIST_FOREACH(by_unit, b, first) {
88f3e0c9 263 r = cgroup_bonding_kill(b, sig, sigcont, rem, s, cgroup_suffix);
ecedd90f 264 if (r < 0) {
8c6db833 265 if (r == -EAGAIN || r == -ESRCH)
50159e6a 266 continue;
8e274523 267
ca949c9d
LP
268 ret = r;
269 goto finish;
50159e6a
LP
270 }
271
ca949c9d
LP
272 if (ret < 0 || r > 0)
273 ret = r;
8e274523
LP
274 }
275
ca949c9d
LP
276finish:
277 if (allocated_set)
278 set_free(allocated_set);
279
280 return ret;
8e274523
LP
281}
282
283/* Returns 1 if the group is empty, 0 if it is not, -EAGAIN if we
284 * cannot know */
285int cgroup_bonding_is_empty(CGroupBonding *b) {
8e274523
LP
286 int r;
287
288 assert(b);
289
8c6db833
LP
290 if ((r = cg_is_empty_recursive(b->controller, b->path, true)) < 0)
291 return r;
8e274523 292
8c6db833
LP
293 /* If it is empty it is empty */
294 if (r > 0)
8e274523
LP
295 return 1;
296
8c6db833 297 /* It's not only us using this cgroup, so we just don't know */
d686d8a9 298 return b->ours ? 0 : -EAGAIN;
8e274523
LP
299}
300
301int cgroup_bonding_is_empty_list(CGroupBonding *first) {
302 CGroupBonding *b;
303
304 LIST_FOREACH(by_unit, b, first) {
305 int r;
306
307 if ((r = cgroup_bonding_is_empty(b)) < 0) {
308 /* If this returned -EAGAIN, then we don't know if the
309 * group is empty, so let's see if another group can
310 * tell us */
311
312 if (r != -EAGAIN)
313 return r;
314 } else
315 return r;
316 }
317
318 return -EAGAIN;
319}
320
8e274523 321int manager_setup_cgroup(Manager *m) {
a32360f1 322 _cleanup_free_ char *current = NULL, *path = NULL;
7027ff61
LP
323 char suffix_buffer[sizeof("/systemd-") + DECIMAL_STR_MAX(pid_t)];
324 const char *suffix;
8e274523 325 int r;
8e274523
LP
326
327 assert(m);
328
e5a53dc7 329 /* 0. Be nice to Ingo Molnar #628004 */
0c85a4f3 330 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
e5a53dc7
LP
331 log_warning("No control group support available, not creating root group.");
332 return 0;
333 }
334
35d2e7ec 335 /* 1. Determine hierarchy */
7027ff61 336 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &current);
9156e799 337 if (r < 0) {
12235040 338 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
a32360f1 339 return r;
12235040 340 }
8e274523 341
67445f4e 342 if (m->running_as == SYSTEMD_SYSTEM)
7027ff61 343 suffix = "/system";
0baf24dd 344 else {
7027ff61
LP
345 sprintf(suffix_buffer, "/systemd-%lu", (unsigned long) getpid());
346 suffix = suffix_buffer;
0baf24dd 347 }
7ccfb64a 348
8e274523 349 free(m->cgroup_hierarchy);
c6c18be3 350 if (endswith(current, suffix)) {
7ccfb64a 351 /* We probably got reexecuted and can continue to use our root cgroup */
c6c18be3
LP
352 m->cgroup_hierarchy = current;
353 current = NULL;
c6c18be3
LP
354 } else {
355 /* We need a new root cgroup */
7027ff61
LP
356 if (streq(current, "/"))
357 m->cgroup_hierarchy = strdup(suffix);
358 else
359 m->cgroup_hierarchy = strappend(current, suffix);
360
361 if (!m->cgroup_hierarchy)
a32360f1 362 return log_oom();
8e274523
LP
363 }
364
35d2e7ec 365 /* 2. Show data */
3474ae3c
LP
366 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, NULL, &path);
367 if (r < 0) {
12235040 368 log_error("Cannot find cgroup mount point: %s", strerror(-r));
a32360f1 369 return r;
12235040 370 }
8e274523 371
c6c18be3
LP
372 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
373
35d2e7ec 374 /* 3. Install agent */
a32360f1
LP
375 if (m->running_as == SYSTEMD_SYSTEM) {
376 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
377 if (r < 0)
378 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
379 else if (r > 0)
380 log_debug("Installed release agent.");
381 else
382 log_debug("Release agent already installed.");
383 }
8e274523 384
35d2e7ec 385 /* 4. Realize the group */
9156e799
LP
386 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, 0);
387 if (r < 0) {
8e274523 388 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
a32360f1 389 return r;
c6c18be3
LP
390 }
391
35d2e7ec 392 /* 5. And pin it, so that it cannot be unmounted */
c6c18be3
LP
393 if (m->pin_cgroupfs_fd >= 0)
394 close_nointr_nofail(m->pin_cgroupfs_fd);
395
9156e799
LP
396 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
397 if (r < 0) {
12235040 398 log_error("Failed to open pin file: %m");
a32360f1 399 return -errno;
c6c18be3
LP
400 }
401
a32360f1 402 /* 6. Remove non-existing controllers from the default controllers list */
b59e2465 403 cg_shorten_controllers(m->default_controllers);
9156e799 404
a32360f1
LP
405 /* 7. Let's create the user and machine hierarchies
406 * right-away, so that people can inotify on them, if they
407 * wish, without this being racy. */
408 if (m->running_as == SYSTEMD_SYSTEM) {
409 cg_create(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, "../user");
410 cg_create(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, "../machine");
411 }
8e274523 412
a32360f1 413 return 0;
8e274523
LP
414}
415
c6c18be3 416void manager_shutdown_cgroup(Manager *m, bool delete) {
8e274523
LP
417 assert(m);
418
c6c18be3
LP
419 if (delete && m->cgroup_hierarchy)
420 cg_delete(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy);
8e274523 421
c6c18be3
LP
422 if (m->pin_cgroupfs_fd >= 0) {
423 close_nointr_nofail(m->pin_cgroupfs_fd);
424 m->pin_cgroupfs_fd = -1;
425 }
426
427 free(m->cgroup_hierarchy);
428 m->cgroup_hierarchy = NULL;
8e274523
LP
429}
430
acb14d31
LP
431int cgroup_bonding_get(Manager *m, const char *cgroup, CGroupBonding **bonding) {
432 CGroupBonding *b;
433 char *p;
434
435 assert(m);
436 assert(cgroup);
437 assert(bonding);
438
439 b = hashmap_get(m->cgroup_bondings, cgroup);
5c72face 440 if (b) {
acb14d31
LP
441 *bonding = b;
442 return 1;
443 }
444
8e70580b 445 p = strdupa(cgroup);
acb14d31
LP
446 if (!p)
447 return -ENOMEM;
448
449 for (;;) {
450 char *e;
451
452 e = strrchr(p, '/');
8e70580b 453 if (e == p || !e) {
acb14d31
LP
454 *bonding = NULL;
455 return 0;
456 }
457
458 *e = 0;
459
460 b = hashmap_get(m->cgroup_bondings, p);
461 if (b) {
acb14d31
LP
462 *bonding = b;
463 return 1;
464 }
465 }
466}
467
8e274523
LP
468int cgroup_notify_empty(Manager *m, const char *group) {
469 CGroupBonding *l, *b;
acb14d31 470 int r;
8e274523
LP
471
472 assert(m);
473 assert(group);
474
acb14d31
LP
475 r = cgroup_bonding_get(m, group, &l);
476 if (r <= 0)
477 return r;
8e274523
LP
478
479 LIST_FOREACH(by_path, b, l) {
480 int t;
481
482 if (!b->unit)
483 continue;
484
353fa6a2
LP
485 t = cgroup_bonding_is_empty_list(b);
486 if (t < 0) {
8e274523
LP
487
488 /* If we don't know, we don't know */
489 if (t != -EAGAIN)
490 log_warning("Failed to check whether cgroup is empty: %s", strerror(errno));
491
492 continue;
493 }
494
353fa6a2
LP
495 if (t > 0) {
496 /* If it is empty, let's delete it */
ac155bb8 497 cgroup_bonding_trim_list(b->unit->cgroup_bondings, true);
353fa6a2 498
8e274523
LP
499 if (UNIT_VTABLE(b->unit)->cgroup_notify_empty)
500 UNIT_VTABLE(b->unit)->cgroup_notify_empty(b->unit);
353fa6a2 501 }
8e274523
LP
502 }
503
504 return 0;
505}
506
8c47c732
LP
507Unit* cgroup_unit_by_pid(Manager *m, pid_t pid) {
508 CGroupBonding *l, *b;
509 char *group = NULL;
8c47c732
LP
510
511 assert(m);
512
513 if (pid <= 1)
514 return NULL;
515
7027ff61 516 if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0)
8c47c732
LP
517 return NULL;
518
519 l = hashmap_get(m->cgroup_bondings, group);
4455bcd0
LP
520
521 if (!l) {
522 char *slash;
523
524 while ((slash = strrchr(group, '/'))) {
525 if (slash == group)
526 break;
527
528 *slash = 0;
529
530 if ((l = hashmap_get(m->cgroup_bondings, group)))
531 break;
532 }
533 }
534
8c47c732
LP
535 free(group);
536
8c47c732
LP
537 LIST_FOREACH(by_path, b, l) {
538
539 if (!b->unit)
540 continue;
541
d686d8a9 542 if (b->ours)
8c47c732
LP
543 return b->unit;
544 }
545
546 return NULL;
547}
548
8e274523
LP
549CGroupBonding *cgroup_bonding_find_list(CGroupBonding *first, const char *controller) {
550 CGroupBonding *b;
551
246aa6dd
LP
552 if (!controller)
553 controller = SYSTEMD_CGROUP_CONTROLLER;
8e274523
LP
554
555 LIST_FOREACH(by_unit, b, first)
556 if (streq(b->controller, controller))
557 return b;
558
559 return NULL;
560}
6dde1f33
LP
561
562char *cgroup_bonding_to_string(CGroupBonding *b) {
563 char *r;
564
565 assert(b);
566
567 if (asprintf(&r, "%s:%s", b->controller, b->path) < 0)
568 return NULL;
569
570 return r;
571}
4fbf50b3
LP
572
573pid_t cgroup_bonding_search_main_pid(CGroupBonding *b) {
574 FILE *f;
2633eb83 575 pid_t pid = 0, npid, mypid;
4fbf50b3
LP
576
577 assert(b);
578
d686d8a9 579 if (!b->ours)
4fbf50b3
LP
580 return 0;
581
bd40a2d8 582 if (cg_enumerate_processes(b->controller, b->path, &f) < 0)
4fbf50b3
LP
583 return 0;
584
2633eb83
LP
585 mypid = getpid();
586
bd40a2d8 587 while (cg_read_pid(f, &npid) > 0) {
2633eb83 588 pid_t ppid;
4fbf50b3
LP
589
590 if (npid == pid)
591 continue;
592
2633eb83
LP
593 /* Ignore processes that aren't our kids */
594 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
595 continue;
596
4fbf50b3 597 if (pid != 0) {
2633eb83
LP
598 /* Dang, there's more than one daemonized PID
599 in this group, so we don't know what process
600 is the main process. */
4fbf50b3
LP
601 pid = 0;
602 break;
603 }
604
605 pid = npid;
606 }
607
608 fclose(f);
609
610 return pid;
611}
612
613pid_t cgroup_bonding_search_main_pid_list(CGroupBonding *first) {
614 CGroupBonding *b;
615 pid_t pid;
616
617 /* Try to find a main pid from this cgroup, but checking if
618 * there's only one PID in the cgroup and returning it. Later
619 * on we might want to add additional, smarter heuristics
620 * here. */
621
622 LIST_FOREACH(by_unit, b, first)
623 if ((pid = cgroup_bonding_search_main_pid(b)) != 0)
624 return pid;
625
626 return 0;
627
628}