From 55e4df21ef5e156316cabd76709b6a1fba503e3c Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 16 Mar 2025 21:09:25 +0100 Subject: [PATCH] core: drop cgroup v1 synthetic empty event logic cgroup v2's empty events are reliable, hence we'd not bother with validating it again in unit_add_to_cgroup_empty_queue() either. --- src/core/cgroup.c | 148 ++----------------------------------------- src/core/cgroup.h | 6 -- src/core/dbus-unit.c | 6 +- src/core/manager.h | 5 +- src/core/scope.c | 46 +++----------- src/core/service.c | 28 +------- src/core/unit.c | 83 ------------------------ src/core/unit.h | 7 -- 8 files changed, 19 insertions(+), 310 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 918beb47c07..3e7bf74b69c 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3726,108 +3726,6 @@ int unit_search_main_pid(Unit *u, PidRef *ret) { return 0; } -static int unit_watch_pids_in_path(Unit *u, const char *path) { - _cleanup_closedir_ DIR *d = NULL; - _cleanup_fclose_ FILE *f = NULL; - int ret = 0, r; - - assert(u); - assert(path); - - r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f); - if (r < 0) - RET_GATHER(ret, r); - else { - for (;;) { - _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; - - r = cg_read_pidref(f, &pid, /* flags = */ 0); - if (r == 0) - break; - if (r < 0) { - RET_GATHER(ret, r); - break; - } - - RET_GATHER(ret, unit_watch_pidref(u, &pid, /* exclusive= */ false)); - } - } - - r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d); - if (r < 0) - RET_GATHER(ret, r); - else { - for (;;) { - _cleanup_free_ char *fn = NULL, *p = NULL; - - r = cg_read_subgroup(d, &fn); - if (r == 0) - break; - if (r < 0) { - RET_GATHER(ret, r); - break; - } - - p = path_join(empty_to_root(path), fn); - if (!p) - return -ENOMEM; - - RET_GATHER(ret, unit_watch_pids_in_path(u, p)); - } - } - - return ret; -} - -int unit_synthesize_cgroup_empty_event(Unit *u) { - int r; - - assert(u); - - /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility - * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can - * get as notification source as soon as we stopped having any useful PIDs to watch for. */ - - CGroupRuntime *crt = unit_get_cgroup_runtime(u); - if (!crt || !crt->cgroup_path) - return -ENOENT; - - r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return r; - if (r > 0) /* On unified we have reliable notifications, and don't need this */ - return 0; - - if (!set_isempty(u->pids)) - return 0; - - unit_add_to_cgroup_empty_queue(u); - return 0; -} - -int unit_watch_all_pids(Unit *u) { - int r; - - assert(u); - - /* Adds all PIDs from our cgroup to the set of PIDs we - * watch. This is a fallback logic for cases where we do not - * get reliable cgroup empty notifications: we try to use - * SIGCHLD as replacement. */ - - CGroupRuntime *crt = unit_get_cgroup_runtime(u); - if (!crt || !crt->cgroup_path) - return -ENOENT; - - r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return r; - if (r > 0) /* On unified we can use proper notifications */ - return 0; - - return unit_watch_pids_in_path(u, crt->cgroup_path); -} - static int on_cgroup_empty_event(sd_event_source *s, void *userdata) { Manager *m = ASSERT_PTR(userdata); Unit *u; @@ -3864,36 +3762,19 @@ static int on_cgroup_empty_event(sd_event_source *s, void *userdata) { return 0; } -void unit_add_to_cgroup_empty_queue(Unit *u) { +static void unit_add_to_cgroup_empty_queue(Unit *u) { int r; assert(u); - /* Note that there are four different ways how cgroup empty events reach us: - * - * 1. On the unified hierarchy we get an inotify event on the cgroup - * - * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket - * - * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus - * - * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as - * soon as we get one SIGCHLD, to deal with unreliable cgroup notifications. - * - * Regardless which way we got the notification, we'll verify it here, and then add it to a separate - * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use - * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending - * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the - * case for scope units). */ + /* Note that cgroup empty events are dispatched in a separate queue with a lower priority than + * the SIGCHLD handler, so that we always use SIGCHLD if we can get it first, and only use + * the cgroup empty notifications if there's no SIGCHLD pending (which might happen if the cgroup + * doesn't contain processes that are our own child, which is typically the case for scope units). */ if (u->in_cgroup_empty_queue) return; - /* Let's verify that the cgroup is really empty */ - r = unit_cgroup_is_empty(u); - if (r <= 0) - return; - LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u); u->in_cgroup_empty_queue = true; @@ -4412,25 +4293,6 @@ Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid) { return NULL; } -int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { - Unit *u; - - assert(m); - assert(cgroup); - - /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process - * or from the --system instance */ - - log_debug("Got cgroup empty notification for: %s", cgroup); - - u = manager_get_unit_by_cgroup(m, cgroup); - if (!u) - return 0; - - unit_add_to_cgroup_empty_queue(u); - return 1; -} - int unit_get_memory_available(Unit *u, uint64_t *ret) { uint64_t available = UINT64_MAX, current = 0; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 7b3fbc3b150..66eb90f856c 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -452,7 +452,6 @@ void unit_add_to_cgroup_realize_queue(Unit *u); int unit_cgroup_is_empty(Unit *u); void unit_release_cgroup(Unit *u, bool drop_cgroup_runtime); -void unit_add_to_cgroup_empty_queue(Unit *u); int unit_check_oomd_kill(Unit *u); int unit_check_oom(Unit *u); @@ -474,9 +473,6 @@ uint64_t unit_get_ancestor_memory_low(Unit *u); uint64_t unit_get_ancestor_startup_memory_low(Unit *u); int unit_search_main_pid(Unit *u, PidRef *ret); -int unit_watch_all_pids(Unit *u); - -int unit_synthesize_cgroup_empty_event(Unit *u); int unit_get_memory_available(Unit *u, uint64_t *ret); int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uint64_t *ret); @@ -499,8 +495,6 @@ bool unit_has_host_root_cgroup(const Unit *u); bool unit_has_startup_cgroup_constraints(Unit *u); -int manager_notify_cgroup_empty(Manager *m, const char *group); - void unit_invalidate_cgroup(Unit *u, CGroupMask m); void unit_invalidate_cgroup_bpf(Unit *u); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index e9a2a4a4c09..3caa4876dd8 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -2644,11 +2644,7 @@ static int bus_unit_track_handler(sd_bus_track *t, void *userdata) { u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */ - /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too, - * let's see */ - unit_add_to_cgroup_empty_queue(u); - - /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */ + /* Add the unit to the GC queue, after all if the client left it might be time to GC this unit */ unit_add_to_gc_queue(u); return 0; diff --git a/src/core/manager.h b/src/core/manager.h index afc7d207744..6fef909350d 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -696,8 +696,7 @@ enum { EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1, EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1, EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL, - EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE, - EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1, - EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2, + EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE, + EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+1, /* … to least important */ }; diff --git a/src/core/scope.c b/src/core/scope.c index 6e66b56dcba..9e14b78da27 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -93,10 +93,8 @@ static void scope_set_state(Scope *s, ScopeState state) { if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN, SCOPE_RUNNING)) s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source); - if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) { + if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) unit_unwatch_all_pids(UNIT(s)); - unit_dequeue_rewatch_pids(UNIT(s)); - } if (state != old_state) log_unit_debug(UNIT(s), "Changed %s -> %s", @@ -237,17 +235,13 @@ static int scope_coldplug(Unit *u) { if (r < 0) return r; - if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED)) { - if (u->pids) { - PidRef *pid; - - SET_FOREACH(pid, u->pids) { - r = unit_watch_pidref(u, pid, /* exclusive= */ false); - if (r < 0 && r != -EEXIST) - return r; - } - } else - (void) unit_enqueue_rewatch_pids(u); + if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED) && u->pids) { + PidRef *pid; + SET_FOREACH(pid, u->pids) { + r = unit_watch_pidref(u, pid, /* exclusive= */ false); + if (r < 0 && r != -EEXIST) + return r; + } } bus_scope_track_controller(s); @@ -297,13 +291,6 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { if (s->result == SCOPE_SUCCESS) s->result = f; - /* Before sending any signal, make sure we track all members of this cgroup */ - (void) unit_watch_all_pids(UNIT(s)); - - /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have - * died now */ - (void) unit_enqueue_rewatch_pids(UNIT(s)); - /* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going * directly into SIGTERM berserk mode */ if (state == SCOPE_STOP_SIGTERM) @@ -435,14 +422,9 @@ static int scope_enter_running(Scope *s) { /* Set the maximum runtime timeout. */ scope_arm_timer(s, /* relative= */ false, scope_running_timeout(s)); - /* On unified we use proper notifications hence we can unwatch the PIDs - * we just attached to the scope. This can also be done on legacy as - * we're going to update the list of the processes we watch with the - * PIDs currently in the scope anyway. */ + /* Unwatch all pids we've just added to cgroup. We rely on empty notifications there. */ unit_unwatch_all_pids(u); - /* Start watching the PIDs currently in the scope (legacy hierarchy only) */ - (void) unit_enqueue_rewatch_pids(u); return 1; fail: @@ -635,12 +617,6 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { scope_enter_running(s); return; } - - /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to - * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original - * process we watched was probably the parent of them, and they are hence now our children. */ - - (void) unit_enqueue_rewatch_pids(u); } static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) { @@ -699,10 +675,6 @@ int scope_abandon(Scope *s) { scope_set_state(s, SCOPE_ABANDONED); - /* The client is no longer watching the remaining processes, so let's step in here, under the assumption that - * the remaining processes will be sooner or later reassigned to us as parent. */ - (void) unit_enqueue_rewatch_pids(UNIT(s)); - return 0; } diff --git a/src/core/service.c b/src/core/service.c index f158c43615b..c64fa06335e 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1310,10 +1310,8 @@ static void service_set_state(Service *s, ServiceState state) { if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED, - SERVICE_DEAD_RESOURCES_PINNED)) { + SERVICE_DEAD_RESOURCES_PINNED)) unit_unwatch_all_pids(u); - unit_dequeue_rewatch_pids(u); - } if (state != SERVICE_START) s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source); @@ -1426,10 +1424,8 @@ static int service_coldplug(Unit *u) { SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED, SERVICE_CLEANING, - SERVICE_DEAD_RESOURCES_PINNED)) { - (void) unit_enqueue_rewatch_pids(u); + SERVICE_DEAD_RESOURCES_PINNED)) (void) unit_setup_exec_runtime(u); - } if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING)) service_start_watchdog(s); @@ -2189,7 +2185,6 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { s->result = f; service_unwatch_control_pid(s); - (void) unit_enqueue_rewatch_pids(UNIT(s)); s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST]; if (s->control_command) { @@ -2244,13 +2239,6 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f if (s->result == SERVICE_SUCCESS) s->result = f; - /* Before sending any signal, make sure we track all members of this cgroup */ - (void) unit_watch_all_pids(UNIT(s)); - - /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have - * died now */ - (void) unit_enqueue_rewatch_pids(UNIT(s)); - kill_operation = state_to_kill_operation(s, state); r = unit_kill_context(UNIT(s), kill_operation); if (r < 0) { @@ -2290,8 +2278,6 @@ static void service_enter_stop_by_notify(Service *s) { assert(s); - (void) unit_enqueue_rewatch_pids(UNIT(s)); - r = service_arm_timer(s, /* relative= */ true, s->timeout_stop_usec); if (r < 0) { log_unit_warning_errno(UNIT(s), r, "Failed to install timer: %m"); @@ -2312,7 +2298,6 @@ static void service_enter_stop(Service *s, ServiceResult f) { s->result = f; service_unwatch_control_pid(s); - (void) unit_enqueue_rewatch_pids(UNIT(s)); s->control_command = s->exec_command[SERVICE_EXEC_STOP]; if (s->control_command) { @@ -4257,15 +4242,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* Notify clients about changed exit status */ if (notify_dbus) unit_add_to_dbus_queue(u); - - /* We watch the main/control process otherwise we can't retrieve the unit they - * belong to with cgroupv1. But if they are not our direct child, we won't get a - * SIGCHLD for them. Therefore we need to look for others to watch so we can - * detect when the cgroup becomes empty. Note that the control process is always - * our child so it's pointless to watch all other processes. */ - if (!control_pid_good(s)) - if (!s->main_pid_known || s->main_pid_alien || unit_cgroup_delegate(u)) - (void) unit_enqueue_rewatch_pids(u); } static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) { diff --git a/src/core/unit.c b/src/core/unit.c index a2eb1f70885..b8febd4a218 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -754,8 +754,6 @@ Unit* unit_free(Unit *u) { unit_done(u); - unit_dequeue_rewatch_pids(u); - u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot); u->bus_track = sd_bus_track_unref(u->bus_track); u->deserialized_refs = strv_free(u->deserialized_refs); @@ -2890,87 +2888,6 @@ void unit_unwatch_pidref_done(Unit *u, PidRef *pidref) { pidref_done(pidref); } -static void unit_tidy_watch_pids(Unit *u) { - PidRef *except1, *except2, *e; - - assert(u); - - /* Cleans dead PIDs from our list */ - - except1 = unit_main_pid(u); - except2 = unit_control_pid(u); - - SET_FOREACH(e, u->pids) { - if (pidref_equal(except1, e) || pidref_equal(except2, e)) - continue; - - if (pidref_is_unwaited(e) <= 0) - unit_unwatch_pidref(u, e); - } -} - -static int on_rewatch_pids_event(sd_event_source *s, void *userdata) { - Unit *u = ASSERT_PTR(userdata); - - assert(s); - - unit_tidy_watch_pids(u); - (void) unit_watch_all_pids(u); - - /* If the PID set is empty now, then let's finish this off. */ - unit_synthesize_cgroup_empty_event(u); - - return 0; -} - -int unit_enqueue_rewatch_pids(Unit *u) { - int r; - - assert(u); - - CGroupRuntime *crt = unit_get_cgroup_runtime(u); - if (!crt || !crt->cgroup_path) - return -ENOENT; - - r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return r; - if (r > 0) /* On unified we can use proper notifications */ - return 0; - - /* Enqueues a low-priority job that will clean up dead PIDs from our list of PIDs to watch and subscribe to new - * PIDs that might have appeared. We do this in a delayed job because the work might be quite slow, as it - * involves issuing kill(pid, 0) on all processes we watch. */ - - if (!u->rewatch_pids_event_source) { - _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; - - r = sd_event_add_defer(u->manager->event, &s, on_rewatch_pids_event, u); - if (r < 0) - return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m"); - - r = sd_event_source_set_priority(s, EVENT_PRIORITY_REWATCH_PIDS); - if (r < 0) - return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: %m"); - - (void) sd_event_source_set_description(s, "tidy-watch-pids"); - - u->rewatch_pids_event_source = TAKE_PTR(s); - } - - r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_ONESHOT); - if (r < 0) - return log_error_errno(r, "Failed to enable event source for tidying watched PIDs: %m"); - - return 0; -} - -void unit_dequeue_rewatch_pids(Unit *u) { - assert(u); - - u->rewatch_pids_event_source = sd_event_source_disable_unref(u->rewatch_pids_event_source); -} - bool unit_job_is_applicable(Unit *u, JobType j) { assert(u); assert(j >= 0 && j < _JOB_TYPE_MAX); diff --git a/src/core/unit.h b/src/core/unit.h index 822c39e5d11..384641f6a77 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -373,10 +373,6 @@ typedef struct Unit { UnitFileState unit_file_state; PresetAction unit_file_preset; - /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new - * ones which might have appeared. */ - sd_event_source *rewatch_pids_event_source; - /* How to start OnSuccess=/OnFailure= units */ JobMode on_success_job_mode; JobMode on_failure_job_mode; @@ -880,9 +876,6 @@ void unit_unwatch_pidref(Unit *u, const PidRef *pid); void unit_unwatch_all_pids(Unit *u); void unit_unwatch_pidref_done(Unit *u, PidRef *pidref); -int unit_enqueue_rewatch_pids(Unit *u); -void unit_dequeue_rewatch_pids(Unit *u); - int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name); int unit_watch_bus_name(Unit *u, const char *name); void unit_unwatch_bus_name(Unit *u, const char *name); -- 2.47.3