]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: drop cgroup v1 synthetic empty event logic
authorMike Yuan <me@yhndnzj.com>
Sun, 16 Mar 2025 20:09:25 +0000 (21:09 +0100)
committerMike Yuan <me@yhndnzj.com>
Fri, 11 Apr 2025 21:50:50 +0000 (23:50 +0200)
cgroup v2's empty events are reliable, hence we'd not bother
with validating it again in unit_add_to_cgroup_empty_queue()
either.

src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-unit.c
src/core/manager.h
src/core/scope.c
src/core/service.c
src/core/unit.c
src/core/unit.h

index 918beb47c0741e29a60db80008d369f0a9f88404..3e7bf74b69c42e9a89f66cd829a6667385785980 100644 (file)
@@ -3726,108 +3726,6 @@ int unit_search_main_pid(Unit *u, PidRef *ret) {
         return 0;
 }
 
-static int unit_watch_pids_in_path(Unit *u, const char *path) {
-        _cleanup_closedir_ DIR *d = NULL;
-        _cleanup_fclose_ FILE *f = NULL;
-        int ret = 0, r;
-
-        assert(u);
-        assert(path);
-
-        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
-        if (r < 0)
-                RET_GATHER(ret, r);
-        else {
-                for (;;) {
-                        _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
-
-                        r = cg_read_pidref(f, &pid, /* flags = */ 0);
-                        if (r == 0)
-                                break;
-                        if (r < 0) {
-                                RET_GATHER(ret, r);
-                                break;
-                        }
-
-                        RET_GATHER(ret, unit_watch_pidref(u, &pid, /* exclusive= */ false));
-                }
-        }
-
-        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
-        if (r < 0)
-                RET_GATHER(ret, r);
-        else {
-                for (;;) {
-                        _cleanup_free_ char *fn = NULL, *p = NULL;
-
-                        r = cg_read_subgroup(d, &fn);
-                        if (r == 0)
-                                break;
-                        if (r < 0) {
-                                RET_GATHER(ret, r);
-                                break;
-                        }
-
-                        p = path_join(empty_to_root(path), fn);
-                        if (!p)
-                                return -ENOMEM;
-
-                        RET_GATHER(ret, unit_watch_pids_in_path(u, p));
-                }
-        }
-
-        return ret;
-}
-
-int unit_synthesize_cgroup_empty_event(Unit *u) {
-        int r;
-
-        assert(u);
-
-        /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
-         * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
-         * get as notification source as soon as we stopped having any useful PIDs to watch for. */
-
-        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
-        if (!crt || !crt->cgroup_path)
-                return -ENOENT;
-
-        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
-        if (r < 0)
-                return r;
-        if (r > 0) /* On unified we have reliable notifications, and don't need this */
-                return 0;
-
-        if (!set_isempty(u->pids))
-                return 0;
-
-        unit_add_to_cgroup_empty_queue(u);
-        return 0;
-}
-
-int unit_watch_all_pids(Unit *u) {
-        int r;
-
-        assert(u);
-
-        /* Adds all PIDs from our cgroup to the set of PIDs we
-         * watch. This is a fallback logic for cases where we do not
-         * get reliable cgroup empty notifications: we try to use
-         * SIGCHLD as replacement. */
-
-        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
-        if (!crt || !crt->cgroup_path)
-                return -ENOENT;
-
-        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
-        if (r < 0)
-                return r;
-        if (r > 0) /* On unified we can use proper notifications */
-                return 0;
-
-        return unit_watch_pids_in_path(u, crt->cgroup_path);
-}
-
 static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
         Manager *m = ASSERT_PTR(userdata);
         Unit *u;
@@ -3864,36 +3762,19 @@ static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
         return 0;
 }
 
-void unit_add_to_cgroup_empty_queue(Unit *u) {
+static void unit_add_to_cgroup_empty_queue(Unit *u) {
         int r;
 
         assert(u);
 
-        /* Note that there are four different ways how cgroup empty events reach us:
-         *
-         * 1. On the unified hierarchy we get an inotify event on the cgroup
-         *
-         * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket
-         *
-         * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus
-         *
-         * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as
-         *    soon as we get one SIGCHLD, to deal with unreliable cgroup notifications.
-         *
-         * Regardless which way we got the notification, we'll verify it here, and then add it to a separate
-         * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use
-         * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending
-         * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the
-         * case for scope units). */
+        /* Note that cgroup empty events are dispatched in a separate queue with a lower priority than
+         * the SIGCHLD handler, so that we always use SIGCHLD if we can get it first, and only use
+         * the cgroup empty notifications if there's no SIGCHLD pending (which might happen if the cgroup
+         * doesn't contain processes that are our own child, which is typically the case for scope units). */
 
         if (u->in_cgroup_empty_queue)
                 return;
 
-        /* Let's verify that the cgroup is really empty */
-        r = unit_cgroup_is_empty(u);
-        if (r <= 0)
-                return;
-
         LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
         u->in_cgroup_empty_queue = true;
 
@@ -4412,25 +4293,6 @@ Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid) {
         return NULL;
 }
 
-int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
-        Unit *u;
-
-        assert(m);
-        assert(cgroup);
-
-        /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process
-         * or from the --system instance */
-
-        log_debug("Got cgroup empty notification for: %s", cgroup);
-
-        u = manager_get_unit_by_cgroup(m, cgroup);
-        if (!u)
-                return 0;
-
-        unit_add_to_cgroup_empty_queue(u);
-        return 1;
-}
-
 int unit_get_memory_available(Unit *u, uint64_t *ret) {
         uint64_t available = UINT64_MAX, current = 0;
 
index 7b3fbc3b1504c2305755eca12c09c62aa55dac88..66eb90f856cea18a3e0b29383b2538ee4ad08fe3 100644 (file)
@@ -452,7 +452,6 @@ void unit_add_to_cgroup_realize_queue(Unit *u);
 int unit_cgroup_is_empty(Unit *u);
 void unit_release_cgroup(Unit *u, bool drop_cgroup_runtime);
 
-void unit_add_to_cgroup_empty_queue(Unit *u);
 int unit_check_oomd_kill(Unit *u);
 int unit_check_oom(Unit *u);
 
@@ -474,9 +473,6 @@ uint64_t unit_get_ancestor_memory_low(Unit *u);
 uint64_t unit_get_ancestor_startup_memory_low(Unit *u);
 
 int unit_search_main_pid(Unit *u, PidRef *ret);
-int unit_watch_all_pids(Unit *u);
-
-int unit_synthesize_cgroup_empty_event(Unit *u);
 
 int unit_get_memory_available(Unit *u, uint64_t *ret);
 int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uint64_t *ret);
@@ -499,8 +495,6 @@ bool unit_has_host_root_cgroup(const Unit *u);
 
 bool unit_has_startup_cgroup_constraints(Unit *u);
 
-int manager_notify_cgroup_empty(Manager *m, const char *group);
-
 void unit_invalidate_cgroup(Unit *u, CGroupMask m);
 void unit_invalidate_cgroup_bpf(Unit *u);
 
index e9a2a4a4c092898bab70b1293d6887d05bff7ab2..3caa4876dd8ecb9765c7fc50d5a44339cffa61c9 100644 (file)
@@ -2644,11 +2644,7 @@ static int bus_unit_track_handler(sd_bus_track *t, void *userdata) {
 
         u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
 
-        /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too,
-         * let's see */
-        unit_add_to_cgroup_empty_queue(u);
-
-        /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */
+        /* Add the unit to the GC queue, after all if the client left it might be time to GC this unit */
         unit_add_to_gc_queue(u);
 
         return 0;
index afc7d2077443c6f75156107cc84d4cfcdd48cc99..6fef909350d6ddee706efc249dd49ac5a115fe60 100644 (file)
@@ -696,8 +696,7 @@ enum {
         EVENT_PRIORITY_TIME_CHANGE       = SD_EVENT_PRIORITY_NORMAL-1,
         EVENT_PRIORITY_TIME_ZONE         = SD_EVENT_PRIORITY_NORMAL-1,
         EVENT_PRIORITY_IPC               = SD_EVENT_PRIORITY_NORMAL,
-        EVENT_PRIORITY_REWATCH_PIDS      = SD_EVENT_PRIORITY_IDLE,
-        EVENT_PRIORITY_SERVICE_WATCHDOG  = SD_EVENT_PRIORITY_IDLE+1,
-        EVENT_PRIORITY_RUN_QUEUE         = SD_EVENT_PRIORITY_IDLE+2,
+        EVENT_PRIORITY_SERVICE_WATCHDOG  = SD_EVENT_PRIORITY_IDLE,
+        EVENT_PRIORITY_RUN_QUEUE         = SD_EVENT_PRIORITY_IDLE+1,
         /* … to least important */
 };
index 6e66b56dcba28ca537a292eca34655991a072010..9e14b78da2772e898674405c7de12d5c7f523985 100644 (file)
@@ -93,10 +93,8 @@ static void scope_set_state(Scope *s, ScopeState state) {
         if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN, SCOPE_RUNNING))
                 s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
 
-        if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
+        if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED))
                 unit_unwatch_all_pids(UNIT(s));
-                unit_dequeue_rewatch_pids(UNIT(s));
-        }
 
         if (state != old_state)
                 log_unit_debug(UNIT(s), "Changed %s -> %s",
@@ -237,17 +235,13 @@ static int scope_coldplug(Unit *u) {
         if (r < 0)
                 return r;
 
-        if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED)) {
-                if (u->pids) {
-                        PidRef *pid;
-
-                        SET_FOREACH(pid, u->pids) {
-                                r = unit_watch_pidref(u, pid, /* exclusive= */ false);
-                                if (r < 0 && r != -EEXIST)
-                                        return r;
-                        }
-                } else
-                        (void) unit_enqueue_rewatch_pids(u);
+        if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED) && u->pids) {
+                PidRef *pid;
+                SET_FOREACH(pid, u->pids) {
+                        r = unit_watch_pidref(u, pid, /* exclusive= */ false);
+                        if (r < 0 && r != -EEXIST)
+                                return r;
+                }
         }
 
         bus_scope_track_controller(s);
@@ -297,13 +291,6 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
         if (s->result == SCOPE_SUCCESS)
                 s->result = f;
 
-        /* Before sending any signal, make sure we track all members of this cgroup */
-        (void) unit_watch_all_pids(UNIT(s));
-
-        /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
-         * died now */
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
-
         /* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going
          * directly into SIGTERM berserk mode */
         if (state == SCOPE_STOP_SIGTERM)
@@ -435,14 +422,9 @@ static int scope_enter_running(Scope *s) {
         /* Set the maximum runtime timeout. */
         scope_arm_timer(s, /* relative= */ false, scope_running_timeout(s));
 
-        /* On unified we use proper notifications hence we can unwatch the PIDs
-         * we just attached to the scope. This can also be done on legacy as
-         * we're going to update the list of the processes we watch with the
-         * PIDs currently in the scope anyway. */
+        /* Unwatch all pids we've just added to cgroup. We rely on empty notifications there. */
         unit_unwatch_all_pids(u);
 
-        /* Start watching the PIDs currently in the scope (legacy hierarchy only) */
-        (void) unit_enqueue_rewatch_pids(u);
         return 1;
 
 fail:
@@ -635,12 +617,6 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                         scope_enter_running(s);
                 return;
         }
-
-        /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
-         * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
-         * process we watched was probably the parent of them, and they are hence now our children. */
-
-        (void) unit_enqueue_rewatch_pids(u);
 }
 
 static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
@@ -699,10 +675,6 @@ int scope_abandon(Scope *s) {
 
         scope_set_state(s, SCOPE_ABANDONED);
 
-        /* The client is no longer watching the remaining processes, so let's step in here, under the assumption that
-         * the remaining processes will be sooner or later reassigned to us as parent. */
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
-
         return 0;
 }
 
index f158c43615b5e7ca444ca75b9d02f7b78636cb2d..c64fa06335e05afcaf27aced73e07e8d6e142af6 100644 (file)
@@ -1310,10 +1310,8 @@ static void service_set_state(Service *s, ServiceState state) {
         if (IN_SET(state,
                    SERVICE_DEAD, SERVICE_FAILED,
                    SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
-                   SERVICE_DEAD_RESOURCES_PINNED)) {
+                   SERVICE_DEAD_RESOURCES_PINNED))
                 unit_unwatch_all_pids(u);
-                unit_dequeue_rewatch_pids(u);
-        }
 
         if (state != SERVICE_START)
                 s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
@@ -1426,10 +1424,8 @@ static int service_coldplug(Unit *u) {
                     SERVICE_DEAD, SERVICE_FAILED,
                     SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
                     SERVICE_CLEANING,
-                    SERVICE_DEAD_RESOURCES_PINNED)) {
-                (void) unit_enqueue_rewatch_pids(u);
+                    SERVICE_DEAD_RESOURCES_PINNED))
                 (void) unit_setup_exec_runtime(u);
-        }
 
         if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING))
                 service_start_watchdog(s);
@@ -2189,7 +2185,6 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
         if (s->control_command) {
@@ -2244,13 +2239,6 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
         if (s->result == SERVICE_SUCCESS)
                 s->result = f;
 
-        /* Before sending any signal, make sure we track all members of this cgroup */
-        (void) unit_watch_all_pids(UNIT(s));
-
-        /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
-         * died now */
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
-
         kill_operation = state_to_kill_operation(s, state);
         r = unit_kill_context(UNIT(s), kill_operation);
         if (r < 0) {
@@ -2290,8 +2278,6 @@ static void service_enter_stop_by_notify(Service *s) {
 
         assert(s);
 
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
-
         r = service_arm_timer(s, /* relative= */ true, s->timeout_stop_usec);
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to install timer: %m");
@@ -2312,7 +2298,6 @@ static void service_enter_stop(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
-        (void) unit_enqueue_rewatch_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP];
         if (s->control_command) {
@@ -4257,15 +4242,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         /* Notify clients about changed exit status */
         if (notify_dbus)
                 unit_add_to_dbus_queue(u);
-
-        /* We watch the main/control process otherwise we can't retrieve the unit they
-         * belong to with cgroupv1. But if they are not our direct child, we won't get a
-         * SIGCHLD for them. Therefore we need to look for others to watch so we can
-         * detect when the cgroup becomes empty. Note that the control process is always
-         * our child so it's pointless to watch all other processes. */
-        if (!control_pid_good(s))
-                if (!s->main_pid_known || s->main_pid_alien || unit_cgroup_delegate(u))
-                        (void) unit_enqueue_rewatch_pids(u);
 }
 
 static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
index a2eb1f70885e99832dbad567ea87e7ba1c19a31b..b8febd4a218d0891764924c9e8102b0f122f4690 100644 (file)
@@ -754,8 +754,6 @@ Unit* unit_free(Unit *u) {
 
         unit_done(u);
 
-        unit_dequeue_rewatch_pids(u);
-
         u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
         u->bus_track = sd_bus_track_unref(u->bus_track);
         u->deserialized_refs = strv_free(u->deserialized_refs);
@@ -2890,87 +2888,6 @@ void unit_unwatch_pidref_done(Unit *u, PidRef *pidref) {
         pidref_done(pidref);
 }
 
-static void unit_tidy_watch_pids(Unit *u) {
-        PidRef *except1, *except2, *e;
-
-        assert(u);
-
-        /* Cleans dead PIDs from our list */
-
-        except1 = unit_main_pid(u);
-        except2 = unit_control_pid(u);
-
-        SET_FOREACH(e, u->pids) {
-                if (pidref_equal(except1, e) || pidref_equal(except2, e))
-                        continue;
-
-                if (pidref_is_unwaited(e) <= 0)
-                        unit_unwatch_pidref(u, e);
-        }
-}
-
-static int on_rewatch_pids_event(sd_event_source *s, void *userdata) {
-        Unit *u = ASSERT_PTR(userdata);
-
-        assert(s);
-
-        unit_tidy_watch_pids(u);
-        (void) unit_watch_all_pids(u);
-
-        /* If the PID set is empty now, then let's finish this off. */
-        unit_synthesize_cgroup_empty_event(u);
-
-        return 0;
-}
-
-int unit_enqueue_rewatch_pids(Unit *u) {
-        int r;
-
-        assert(u);
-
-        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
-        if (!crt || !crt->cgroup_path)
-                return -ENOENT;
-
-        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
-        if (r < 0)
-                return r;
-        if (r > 0) /* On unified we can use proper notifications */
-                return 0;
-
-        /* Enqueues a low-priority job that will clean up dead PIDs from our list of PIDs to watch and subscribe to new
-         * PIDs that might have appeared. We do this in a delayed job because the work might be quite slow, as it
-         * involves issuing kill(pid, 0) on all processes we watch. */
-
-        if (!u->rewatch_pids_event_source) {
-                _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
-
-                r = sd_event_add_defer(u->manager->event, &s, on_rewatch_pids_event, u);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m");
-
-                r = sd_event_source_set_priority(s, EVENT_PRIORITY_REWATCH_PIDS);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: %m");
-
-                (void) sd_event_source_set_description(s, "tidy-watch-pids");
-
-                u->rewatch_pids_event_source = TAKE_PTR(s);
-        }
-
-        r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_ONESHOT);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enable event source for tidying watched PIDs: %m");
-
-        return 0;
-}
-
-void unit_dequeue_rewatch_pids(Unit *u) {
-        assert(u);
-
-        u->rewatch_pids_event_source = sd_event_source_disable_unref(u->rewatch_pids_event_source);
-}
-
 bool unit_job_is_applicable(Unit *u, JobType j) {
         assert(u);
         assert(j >= 0 && j < _JOB_TYPE_MAX);
index 822c39e5d119eae8f5d61930d9a9a6c0b86a3e28..384641f6a770e207ce3973678fbfcda7f897b234 100644 (file)
@@ -373,10 +373,6 @@ typedef struct Unit {
         UnitFileState unit_file_state;
         PresetAction unit_file_preset;
 
-        /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
-         * ones which might have appeared. */
-        sd_event_source *rewatch_pids_event_source;
-
         /* How to start OnSuccess=/OnFailure= units */
         JobMode on_success_job_mode;
         JobMode on_failure_job_mode;
@@ -880,9 +876,6 @@ void unit_unwatch_pidref(Unit *u, const PidRef *pid);
 void unit_unwatch_all_pids(Unit *u);
 void unit_unwatch_pidref_done(Unit *u, PidRef *pidref);
 
-int unit_enqueue_rewatch_pids(Unit *u);
-void unit_dequeue_rewatch_pids(Unit *u);
-
 int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name);
 int unit_watch_bus_name(Unit *u, const char *name);
 void unit_unwatch_bus_name(Unit *u, const char *name);