From: Anita Zhang Date: Fri, 23 Oct 2020 05:44:22 +0000 (-0700) Subject: core: clean up inactive/failed {service|scope}'s cgroups when the last process exits X-Git-Tag: v247-rc2~55 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e08dabfec7304dfa0d59997dc4219ffaf22af717;p=thirdparty%2Fsystemd.git core: clean up inactive/failed {service|scope}'s cgroups when the last process exits If processes remain in the unit's cgroup after the final SIGKILL is sent and the unit has exceeded stop timeout, don't release the unit's cgroup information. Pid1 will have failed to `rmdir` the cgroup path due to processes remaining in the cgroup and releasing would leave the cgroup path on the file system with no tracking for pid1 to clean it up. Instead, keep the information around until the last process exits and pid1 sends the cgroup empty notification. The service/scope can then prune the cgroup if the unit is inactive/failed. --- diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 1958c1be2b9..5d312144612 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2422,6 +2422,29 @@ void unit_release_cgroup(Unit *u) { } } +bool unit_maybe_release_cgroup(Unit *u) { + int r; + + assert(u); + + if (!u->cgroup_path) + return true; + + /* Don't release the cgroup if there are still processes under it. If we get notified later when all the + * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed) + * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned + * up later. */ + r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path); + if (r < 0) + log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m"); + else if (r == 1) { + unit_release_cgroup(u); + return true; + } + + return false; +} + void unit_prune_cgroup(Unit *u) { int r; bool is_root_slice; @@ -2449,7 +2472,8 @@ void unit_prune_cgroup(Unit *u) { if (is_root_slice) return; - unit_release_cgroup(u); + if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */ + return; u->cgroup_realized = false; u->cgroup_realized_mask = 0; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 881b3f3dfe8..4a748f6ddd4 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -223,11 +223,15 @@ int unit_set_cgroup_path(Unit *u, const char *path); int unit_pick_cgroup_path(Unit *u); int unit_realize_cgroup(Unit *u); -void unit_release_cgroup(Unit *u); void unit_prune_cgroup(Unit *u); int unit_watch_cgroup(Unit *u); int unit_watch_cgroup_memory(Unit *u); +void unit_release_cgroup(Unit *u); +/* Releases the cgroup only if it is recursively empty. + * Returns true if the cgroup was released, false otherwise. */ +bool unit_maybe_release_cgroup(Unit *u); + void unit_add_to_cgroup_empty_queue(Unit *u); int unit_check_oomd_kill(Unit *u); int unit_check_oom(Unit *u); diff --git a/src/core/scope.c b/src/core/scope.c index 540c83ba451..13cd2e6c3c2 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -487,6 +487,11 @@ static void scope_notify_cgroup_empty_event(Unit *u) { if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL)) scope_enter_dead(s, SCOPE_SUCCESS); + + /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean + * up the cgroup earlier and should do it now. */ + if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED)) + unit_prune_cgroup(u); } static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { diff --git a/src/core/service.c b/src/core/service.c index 9d834d4069d..af7534546dd 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -3330,6 +3330,13 @@ static void service_notify_cgroup_empty_event(Unit *u) { break; + /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean + * up the cgroup earlier and should do it now. */ + case SERVICE_DEAD: + case SERVICE_FAILED: + unit_prune_cgroup(u); + break; + default: ; }