]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/core/cgroup.c
nspawn: introduce the new /machine/ tree in the cgroup tree and move containers there
[thirdparty/systemd.git] / src / core / cgroup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <assert.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <signal.h>
27 #include <sys/mount.h>
28 #include <fcntl.h>
29
30 #include "cgroup.h"
31 #include "cgroup-util.h"
32 #include "log.h"
33 #include "strv.h"
34 #include "path-util.h"
35
36 int cgroup_bonding_realize(CGroupBonding *b) {
37 int r;
38
39 assert(b);
40 assert(b->path);
41 assert(b->controller);
42
43 r = cg_create(b->controller, b->path, NULL);
44 if (r < 0) {
45 log_warning("Failed to create cgroup %s:%s: %s", b->controller, b->path, strerror(-r));
46 return r;
47 }
48
49 b->realized = true;
50
51 return 0;
52 }
53
54 int cgroup_bonding_realize_list(CGroupBonding *first) {
55 CGroupBonding *b;
56 int r;
57
58 LIST_FOREACH(by_unit, b, first)
59 if ((r = cgroup_bonding_realize(b)) < 0 && b->essential)
60 return r;
61
62 return 0;
63 }
64
65 void cgroup_bonding_free(CGroupBonding *b, bool trim) {
66 assert(b);
67
68 if (b->unit) {
69 CGroupBonding *f;
70
71 LIST_REMOVE(CGroupBonding, by_unit, b->unit->cgroup_bondings, b);
72
73 if (streq(b->controller, SYSTEMD_CGROUP_CONTROLLER)) {
74 assert_se(f = hashmap_get(b->unit->manager->cgroup_bondings, b->path));
75 LIST_REMOVE(CGroupBonding, by_path, f, b);
76
77 if (f)
78 hashmap_replace(b->unit->manager->cgroup_bondings, b->path, f);
79 else
80 hashmap_remove(b->unit->manager->cgroup_bondings, b->path);
81 }
82 }
83
84 if (b->realized && b->ours && trim)
85 cg_trim(b->controller, b->path, false);
86
87 free(b->controller);
88 free(b->path);
89 free(b);
90 }
91
92 void cgroup_bonding_free_list(CGroupBonding *first, bool remove_or_trim) {
93 CGroupBonding *b, *n;
94
95 LIST_FOREACH_SAFE(by_unit, b, n, first)
96 cgroup_bonding_free(b, remove_or_trim);
97 }
98
99 void cgroup_bonding_trim(CGroupBonding *b, bool delete_root) {
100 assert(b);
101
102 if (b->realized && b->ours)
103 cg_trim(b->controller, b->path, delete_root);
104 }
105
106 void cgroup_bonding_trim_list(CGroupBonding *first, bool delete_root) {
107 CGroupBonding *b;
108
109 LIST_FOREACH(by_unit, b, first)
110 cgroup_bonding_trim(b, delete_root);
111 }
112
113 int cgroup_bonding_install(CGroupBonding *b, pid_t pid, const char *cgroup_suffix) {
114 _cleanup_free_ char *p = NULL;
115 const char *path;
116 int r;
117
118 assert(b);
119 assert(pid >= 0);
120
121 if (cgroup_suffix) {
122 p = strjoin(b->path, "/", cgroup_suffix, NULL);
123 if (!p)
124 return -ENOMEM;
125
126 path = p;
127 } else
128 path = b->path;
129
130 r = cg_create_and_attach(b->controller, path, pid);
131 if (r < 0)
132 return r;
133
134 b->realized = true;
135 return 0;
136 }
137
138 int cgroup_bonding_install_list(CGroupBonding *first, pid_t pid, const char *cgroup_suffix) {
139 CGroupBonding *b;
140 int r;
141
142 LIST_FOREACH(by_unit, b, first) {
143 r = cgroup_bonding_install(b, pid, cgroup_suffix);
144 if (r < 0 && b->essential)
145 return r;
146 }
147
148 return 0;
149 }
150
151 int cgroup_bonding_migrate(CGroupBonding *b, CGroupBonding *list) {
152 CGroupBonding *q;
153 int ret = 0;
154
155 LIST_FOREACH(by_unit, q, list) {
156 int r;
157
158 if (q == b)
159 continue;
160
161 if (!q->ours)
162 continue;
163
164 r = cg_migrate_recursive(q->controller, q->path, b->controller, b->path, true, false);
165 if (r < 0 && ret == 0)
166 ret = r;
167 }
168
169 return ret;
170 }
171
172 int cgroup_bonding_migrate_to(CGroupBonding *b, const char *target, bool rem) {
173 assert(b);
174 assert(target);
175
176 return cg_migrate_recursive(b->controller, b->path, b->controller, target, true, rem);
177 }
178
179 int cgroup_bonding_set_group_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid) {
180 assert(b);
181
182 if (!b->realized)
183 return -EINVAL;
184
185 return cg_set_group_access(b->controller, b->path, mode, uid, gid);
186 }
187
188 int cgroup_bonding_set_group_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid) {
189 CGroupBonding *b;
190 int r;
191
192 LIST_FOREACH(by_unit, b, first) {
193 r = cgroup_bonding_set_group_access(b, mode, uid, gid);
194 if (r < 0)
195 return r;
196 }
197
198 return 0;
199 }
200
201 int cgroup_bonding_set_task_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky) {
202 assert(b);
203
204 if (!b->realized)
205 return -EINVAL;
206
207 return cg_set_task_access(b->controller, b->path, mode, uid, gid, sticky);
208 }
209
210 int cgroup_bonding_set_task_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid, int sticky) {
211 CGroupBonding *b;
212 int r;
213
214 LIST_FOREACH(by_unit, b, first) {
215 r = cgroup_bonding_set_task_access(b, mode, uid, gid, sticky);
216 if (r < 0)
217 return r;
218 }
219
220 return 0;
221 }
222
223 int cgroup_bonding_kill(CGroupBonding *b, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
224 char *p = NULL;
225 const char *path;
226 int r;
227
228 assert(b);
229 assert(sig >= 0);
230
231 /* Don't kill cgroups that aren't ours */
232 if (!b->ours)
233 return 0;
234
235 if (cgroup_suffix) {
236 p = strjoin(b->path, "/", cgroup_suffix, NULL);
237 if (!p)
238 return -ENOMEM;
239
240 path = p;
241 } else
242 path = b->path;
243
244 r = cg_kill_recursive(b->controller, path, sig, sigcont, true, rem, s);
245 free(p);
246
247 return r;
248 }
249
250 int cgroup_bonding_kill_list(CGroupBonding *first, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
251 CGroupBonding *b;
252 Set *allocated_set = NULL;
253 int ret = -EAGAIN, r;
254
255 if (!first)
256 return 0;
257
258 if (!s)
259 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
260 return -ENOMEM;
261
262 LIST_FOREACH(by_unit, b, first) {
263 r = cgroup_bonding_kill(b, sig, sigcont, rem, s, cgroup_suffix);
264 if (r < 0) {
265 if (r == -EAGAIN || r == -ESRCH)
266 continue;
267
268 ret = r;
269 goto finish;
270 }
271
272 if (ret < 0 || r > 0)
273 ret = r;
274 }
275
276 finish:
277 if (allocated_set)
278 set_free(allocated_set);
279
280 return ret;
281 }
282
283 /* Returns 1 if the group is empty, 0 if it is not, -EAGAIN if we
284 * cannot know */
285 int cgroup_bonding_is_empty(CGroupBonding *b) {
286 int r;
287
288 assert(b);
289
290 if ((r = cg_is_empty_recursive(b->controller, b->path, true)) < 0)
291 return r;
292
293 /* If it is empty it is empty */
294 if (r > 0)
295 return 1;
296
297 /* It's not only us using this cgroup, so we just don't know */
298 return b->ours ? 0 : -EAGAIN;
299 }
300
301 int cgroup_bonding_is_empty_list(CGroupBonding *first) {
302 CGroupBonding *b;
303
304 LIST_FOREACH(by_unit, b, first) {
305 int r;
306
307 if ((r = cgroup_bonding_is_empty(b)) < 0) {
308 /* If this returned -EAGAIN, then we don't know if the
309 * group is empty, so let's see if another group can
310 * tell us */
311
312 if (r != -EAGAIN)
313 return r;
314 } else
315 return r;
316 }
317
318 return -EAGAIN;
319 }
320
321 int manager_setup_cgroup(Manager *m) {
322 _cleanup_free_ char *current = NULL, *path = NULL;
323 char suffix_buffer[sizeof("/systemd-") + DECIMAL_STR_MAX(pid_t)];
324 const char *suffix;
325 int r;
326
327 assert(m);
328
329 /* 0. Be nice to Ingo Molnar #628004 */
330 if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) {
331 log_warning("No control group support available, not creating root group.");
332 return 0;
333 }
334
335 /* 1. Determine hierarchy */
336 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &current);
337 if (r < 0) {
338 log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
339 return r;
340 }
341
342 if (m->running_as == SYSTEMD_SYSTEM)
343 suffix = "/system";
344 else {
345 sprintf(suffix_buffer, "/systemd-%lu", (unsigned long) getpid());
346 suffix = suffix_buffer;
347 }
348
349 free(m->cgroup_hierarchy);
350 if (endswith(current, suffix)) {
351 /* We probably got reexecuted and can continue to use our root cgroup */
352 m->cgroup_hierarchy = current;
353 current = NULL;
354 } else {
355 /* We need a new root cgroup */
356 if (streq(current, "/"))
357 m->cgroup_hierarchy = strdup(suffix);
358 else
359 m->cgroup_hierarchy = strappend(current, suffix);
360
361 if (!m->cgroup_hierarchy)
362 return log_oom();
363 }
364
365 /* 2. Show data */
366 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, NULL, &path);
367 if (r < 0) {
368 log_error("Cannot find cgroup mount point: %s", strerror(-r));
369 return r;
370 }
371
372 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
373
374 /* 3. Install agent */
375 if (m->running_as == SYSTEMD_SYSTEM) {
376 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
377 if (r < 0)
378 log_warning("Failed to install release agent, ignoring: %s", strerror(-r));
379 else if (r > 0)
380 log_debug("Installed release agent.");
381 else
382 log_debug("Release agent already installed.");
383 }
384
385 /* 4. Realize the group */
386 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, 0);
387 if (r < 0) {
388 log_error("Failed to create root cgroup hierarchy: %s", strerror(-r));
389 return r;
390 }
391
392 /* 5. And pin it, so that it cannot be unmounted */
393 if (m->pin_cgroupfs_fd >= 0)
394 close_nointr_nofail(m->pin_cgroupfs_fd);
395
396 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
397 if (r < 0) {
398 log_error("Failed to open pin file: %m");
399 return -errno;
400 }
401
402 /* 6. Remove non-existing controllers from the default controllers list */
403 cg_shorten_controllers(m->default_controllers);
404
405 /* 7. Let's create the user and machine hierarchies
406 * right-away, so that people can inotify on them, if they
407 * wish, without this being racy. */
408 if (m->running_as == SYSTEMD_SYSTEM) {
409 cg_create(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, "../user");
410 cg_create(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy, "../machine");
411 }
412
413 return 0;
414 }
415
416 void manager_shutdown_cgroup(Manager *m, bool delete) {
417 assert(m);
418
419 if (delete && m->cgroup_hierarchy)
420 cg_delete(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_hierarchy);
421
422 if (m->pin_cgroupfs_fd >= 0) {
423 close_nointr_nofail(m->pin_cgroupfs_fd);
424 m->pin_cgroupfs_fd = -1;
425 }
426
427 free(m->cgroup_hierarchy);
428 m->cgroup_hierarchy = NULL;
429 }
430
431 int cgroup_bonding_get(Manager *m, const char *cgroup, CGroupBonding **bonding) {
432 CGroupBonding *b;
433 char *p;
434
435 assert(m);
436 assert(cgroup);
437 assert(bonding);
438
439 b = hashmap_get(m->cgroup_bondings, cgroup);
440 if (b) {
441 *bonding = b;
442 return 1;
443 }
444
445 p = strdupa(cgroup);
446 if (!p)
447 return -ENOMEM;
448
449 for (;;) {
450 char *e;
451
452 e = strrchr(p, '/');
453 if (e == p || !e) {
454 *bonding = NULL;
455 return 0;
456 }
457
458 *e = 0;
459
460 b = hashmap_get(m->cgroup_bondings, p);
461 if (b) {
462 *bonding = b;
463 return 1;
464 }
465 }
466 }
467
468 int cgroup_notify_empty(Manager *m, const char *group) {
469 CGroupBonding *l, *b;
470 int r;
471
472 assert(m);
473 assert(group);
474
475 r = cgroup_bonding_get(m, group, &l);
476 if (r <= 0)
477 return r;
478
479 LIST_FOREACH(by_path, b, l) {
480 int t;
481
482 if (!b->unit)
483 continue;
484
485 t = cgroup_bonding_is_empty_list(b);
486 if (t < 0) {
487
488 /* If we don't know, we don't know */
489 if (t != -EAGAIN)
490 log_warning("Failed to check whether cgroup is empty: %s", strerror(errno));
491
492 continue;
493 }
494
495 if (t > 0) {
496 /* If it is empty, let's delete it */
497 cgroup_bonding_trim_list(b->unit->cgroup_bondings, true);
498
499 if (UNIT_VTABLE(b->unit)->cgroup_notify_empty)
500 UNIT_VTABLE(b->unit)->cgroup_notify_empty(b->unit);
501 }
502 }
503
504 return 0;
505 }
506
507 Unit* cgroup_unit_by_pid(Manager *m, pid_t pid) {
508 CGroupBonding *l, *b;
509 char *group = NULL;
510
511 assert(m);
512
513 if (pid <= 1)
514 return NULL;
515
516 if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0)
517 return NULL;
518
519 l = hashmap_get(m->cgroup_bondings, group);
520
521 if (!l) {
522 char *slash;
523
524 while ((slash = strrchr(group, '/'))) {
525 if (slash == group)
526 break;
527
528 *slash = 0;
529
530 if ((l = hashmap_get(m->cgroup_bondings, group)))
531 break;
532 }
533 }
534
535 free(group);
536
537 LIST_FOREACH(by_path, b, l) {
538
539 if (!b->unit)
540 continue;
541
542 if (b->ours)
543 return b->unit;
544 }
545
546 return NULL;
547 }
548
549 CGroupBonding *cgroup_bonding_find_list(CGroupBonding *first, const char *controller) {
550 CGroupBonding *b;
551
552 if (!controller)
553 controller = SYSTEMD_CGROUP_CONTROLLER;
554
555 LIST_FOREACH(by_unit, b, first)
556 if (streq(b->controller, controller))
557 return b;
558
559 return NULL;
560 }
561
562 char *cgroup_bonding_to_string(CGroupBonding *b) {
563 char *r;
564
565 assert(b);
566
567 if (asprintf(&r, "%s:%s", b->controller, b->path) < 0)
568 return NULL;
569
570 return r;
571 }
572
573 pid_t cgroup_bonding_search_main_pid(CGroupBonding *b) {
574 FILE *f;
575 pid_t pid = 0, npid, mypid;
576
577 assert(b);
578
579 if (!b->ours)
580 return 0;
581
582 if (cg_enumerate_processes(b->controller, b->path, &f) < 0)
583 return 0;
584
585 mypid = getpid();
586
587 while (cg_read_pid(f, &npid) > 0) {
588 pid_t ppid;
589
590 if (npid == pid)
591 continue;
592
593 /* Ignore processes that aren't our kids */
594 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
595 continue;
596
597 if (pid != 0) {
598 /* Dang, there's more than one daemonized PID
599 in this group, so we don't know what process
600 is the main process. */
601 pid = 0;
602 break;
603 }
604
605 pid = npid;
606 }
607
608 fclose(f);
609
610 return pid;
611 }
612
613 pid_t cgroup_bonding_search_main_pid_list(CGroupBonding *first) {
614 CGroupBonding *b;
615 pid_t pid;
616
617 /* Try to find a main pid from this cgroup, but checking if
618 * there's only one PID in the cgroup and returning it. Later
619 * on we might want to add additional, smarter heuristics
620 * here. */
621
622 LIST_FOREACH(by_unit, b, first)
623 if ((pid = cgroup_bonding_search_main_pid(b)) != 0)
624 return pid;
625
626 return 0;
627
628 }