return 0;
}
-static int unit_watch_pids_in_path(Unit *u, const char *path) {
- _cleanup_closedir_ DIR *d = NULL;
- _cleanup_fclose_ FILE *f = NULL;
- int ret = 0, r;
-
- assert(u);
- assert(path);
-
- r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
- if (r < 0)
- RET_GATHER(ret, r);
- else {
- for (;;) {
- _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
-
- r = cg_read_pidref(f, &pid, /* flags = */ 0);
- if (r == 0)
- break;
- if (r < 0) {
- RET_GATHER(ret, r);
- break;
- }
-
- RET_GATHER(ret, unit_watch_pidref(u, &pid, /* exclusive= */ false));
- }
- }
-
- r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
- if (r < 0)
- RET_GATHER(ret, r);
- else {
- for (;;) {
- _cleanup_free_ char *fn = NULL, *p = NULL;
-
- r = cg_read_subgroup(d, &fn);
- if (r == 0)
- break;
- if (r < 0) {
- RET_GATHER(ret, r);
- break;
- }
-
- p = path_join(empty_to_root(path), fn);
- if (!p)
- return -ENOMEM;
-
- RET_GATHER(ret, unit_watch_pids_in_path(u, p));
- }
- }
-
- return ret;
-}
-
-int unit_synthesize_cgroup_empty_event(Unit *u) {
- int r;
-
- assert(u);
-
- /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
- * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
- * get as notification source as soon as we stopped having any useful PIDs to watch for. */
-
- CGroupRuntime *crt = unit_get_cgroup_runtime(u);
- if (!crt || !crt->cgroup_path)
- return -ENOENT;
-
- r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
- if (r < 0)
- return r;
- if (r > 0) /* On unified we have reliable notifications, and don't need this */
- return 0;
-
- if (!set_isempty(u->pids))
- return 0;
-
- unit_add_to_cgroup_empty_queue(u);
- return 0;
-}
-
-int unit_watch_all_pids(Unit *u) {
- int r;
-
- assert(u);
-
- /* Adds all PIDs from our cgroup to the set of PIDs we
- * watch. This is a fallback logic for cases where we do not
- * get reliable cgroup empty notifications: we try to use
- * SIGCHLD as replacement. */
-
- CGroupRuntime *crt = unit_get_cgroup_runtime(u);
- if (!crt || !crt->cgroup_path)
- return -ENOENT;
-
- r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
- if (r < 0)
- return r;
- if (r > 0) /* On unified we can use proper notifications */
- return 0;
-
- return unit_watch_pids_in_path(u, crt->cgroup_path);
-}
-
static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
Manager *m = ASSERT_PTR(userdata);
Unit *u;
return 0;
}
-void unit_add_to_cgroup_empty_queue(Unit *u) {
+static void unit_add_to_cgroup_empty_queue(Unit *u) {
int r;
assert(u);
- /* Note that there are four different ways how cgroup empty events reach us:
- *
- * 1. On the unified hierarchy we get an inotify event on the cgroup
- *
- * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket
- *
- * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus
- *
- * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as
- * soon as we get one SIGCHLD, to deal with unreliable cgroup notifications.
- *
- * Regardless which way we got the notification, we'll verify it here, and then add it to a separate
- * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use
- * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending
- * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the
- * case for scope units). */
+ /* Note that cgroup empty events are dispatched in a separate queue with a lower priority than
+ * the SIGCHLD handler, so that we always use SIGCHLD if we can get it first, and only use
+ * the cgroup empty notifications if there's no SIGCHLD pending (which might happen if the cgroup
+ * doesn't contain processes that are our own child, which is typically the case for scope units). */
if (u->in_cgroup_empty_queue)
return;
- /* Let's verify that the cgroup is really empty */
- r = unit_cgroup_is_empty(u);
- if (r <= 0)
- return;
-
LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
u->in_cgroup_empty_queue = true;
return NULL;
}
-int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
- Unit *u;
-
- assert(m);
- assert(cgroup);
-
- /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process
- * or from the --system instance */
-
- log_debug("Got cgroup empty notification for: %s", cgroup);
-
- u = manager_get_unit_by_cgroup(m, cgroup);
- if (!u)
- return 0;
-
- unit_add_to_cgroup_empty_queue(u);
- return 1;
-}
-
int unit_get_memory_available(Unit *u, uint64_t *ret) {
uint64_t available = UINT64_MAX, current = 0;
int unit_cgroup_is_empty(Unit *u);
void unit_release_cgroup(Unit *u, bool drop_cgroup_runtime);
-void unit_add_to_cgroup_empty_queue(Unit *u);
int unit_check_oomd_kill(Unit *u);
int unit_check_oom(Unit *u);
uint64_t unit_get_ancestor_startup_memory_low(Unit *u);
int unit_search_main_pid(Unit *u, PidRef *ret);
-int unit_watch_all_pids(Unit *u);
-
-int unit_synthesize_cgroup_empty_event(Unit *u);
int unit_get_memory_available(Unit *u, uint64_t *ret);
int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uint64_t *ret);
bool unit_has_startup_cgroup_constraints(Unit *u);
-int manager_notify_cgroup_empty(Manager *m, const char *group);
-
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
void unit_invalidate_cgroup_bpf(Unit *u);
u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
- /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too,
- * let's see */
- unit_add_to_cgroup_empty_queue(u);
-
- /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */
+ /* Add the unit to the GC queue, after all if the client left it might be time to GC this unit */
unit_add_to_gc_queue(u);
return 0;
EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL,
- EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE,
- EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1,
- EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2,
+ EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE,
+ EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+1,
/* … to least important */
};
if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN, SCOPE_RUNNING))
s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
- if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
+ if (!IN_SET(old_state, SCOPE_DEAD, SCOPE_FAILED) && IN_SET(state, SCOPE_DEAD, SCOPE_FAILED))
unit_unwatch_all_pids(UNIT(s));
- unit_dequeue_rewatch_pids(UNIT(s));
- }
if (state != old_state)
log_unit_debug(UNIT(s), "Changed %s -> %s",
if (r < 0)
return r;
- if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED)) {
- if (u->pids) {
- PidRef *pid;
-
- SET_FOREACH(pid, u->pids) {
- r = unit_watch_pidref(u, pid, /* exclusive= */ false);
- if (r < 0 && r != -EEXIST)
- return r;
- }
- } else
- (void) unit_enqueue_rewatch_pids(u);
+ if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED) && u->pids) {
+ PidRef *pid;
+ SET_FOREACH(pid, u->pids) {
+ r = unit_watch_pidref(u, pid, /* exclusive= */ false);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
}
bus_scope_track_controller(s);
if (s->result == SCOPE_SUCCESS)
s->result = f;
- /* Before sending any signal, make sure we track all members of this cgroup */
- (void) unit_watch_all_pids(UNIT(s));
-
- /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
- * died now */
- (void) unit_enqueue_rewatch_pids(UNIT(s));
-
/* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going
* directly into SIGTERM berserk mode */
if (state == SCOPE_STOP_SIGTERM)
/* Set the maximum runtime timeout. */
scope_arm_timer(s, /* relative= */ false, scope_running_timeout(s));
- /* On unified we use proper notifications hence we can unwatch the PIDs
- * we just attached to the scope. This can also be done on legacy as
- * we're going to update the list of the processes we watch with the
- * PIDs currently in the scope anyway. */
+ /* Unwatch all pids we've just added to cgroup. We rely on empty notifications there. */
unit_unwatch_all_pids(u);
- /* Start watching the PIDs currently in the scope (legacy hierarchy only) */
- (void) unit_enqueue_rewatch_pids(u);
return 1;
fail:
scope_enter_running(s);
return;
}
-
- /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
- * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
- * process we watched was probably the parent of them, and they are hence now our children. */
-
- (void) unit_enqueue_rewatch_pids(u);
}
static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
scope_set_state(s, SCOPE_ABANDONED);
- /* The client is no longer watching the remaining processes, so let's step in here, under the assumption that
- * the remaining processes will be sooner or later reassigned to us as parent. */
- (void) unit_enqueue_rewatch_pids(UNIT(s));
-
return 0;
}
if (IN_SET(state,
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
- SERVICE_DEAD_RESOURCES_PINNED)) {
+ SERVICE_DEAD_RESOURCES_PINNED))
unit_unwatch_all_pids(u);
- unit_dequeue_rewatch_pids(u);
- }
if (state != SERVICE_START)
s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
SERVICE_DEAD, SERVICE_FAILED,
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
SERVICE_CLEANING,
- SERVICE_DEAD_RESOURCES_PINNED)) {
- (void) unit_enqueue_rewatch_pids(u);
+ SERVICE_DEAD_RESOURCES_PINNED))
(void) unit_setup_exec_runtime(u);
- }
if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING))
service_start_watchdog(s);
s->result = f;
service_unwatch_control_pid(s);
- (void) unit_enqueue_rewatch_pids(UNIT(s));
s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
if (s->control_command) {
if (s->result == SERVICE_SUCCESS)
s->result = f;
- /* Before sending any signal, make sure we track all members of this cgroup */
- (void) unit_watch_all_pids(UNIT(s));
-
- /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
- * died now */
- (void) unit_enqueue_rewatch_pids(UNIT(s));
-
kill_operation = state_to_kill_operation(s, state);
r = unit_kill_context(UNIT(s), kill_operation);
if (r < 0) {
assert(s);
- (void) unit_enqueue_rewatch_pids(UNIT(s));
-
r = service_arm_timer(s, /* relative= */ true, s->timeout_stop_usec);
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to install timer: %m");
s->result = f;
service_unwatch_control_pid(s);
- (void) unit_enqueue_rewatch_pids(UNIT(s));
s->control_command = s->exec_command[SERVICE_EXEC_STOP];
if (s->control_command) {
/* Notify clients about changed exit status */
if (notify_dbus)
unit_add_to_dbus_queue(u);
-
- /* We watch the main/control process otherwise we can't retrieve the unit they
- * belong to with cgroupv1. But if they are not our direct child, we won't get a
- * SIGCHLD for them. Therefore we need to look for others to watch so we can
- * detect when the cgroup becomes empty. Note that the control process is always
- * our child so it's pointless to watch all other processes. */
- if (!control_pid_good(s))
- if (!s->main_pid_known || s->main_pid_alien || unit_cgroup_delegate(u))
- (void) unit_enqueue_rewatch_pids(u);
}
static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
unit_done(u);
- unit_dequeue_rewatch_pids(u);
-
u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
u->bus_track = sd_bus_track_unref(u->bus_track);
u->deserialized_refs = strv_free(u->deserialized_refs);
pidref_done(pidref);
}
-static void unit_tidy_watch_pids(Unit *u) {
- PidRef *except1, *except2, *e;
-
- assert(u);
-
- /* Cleans dead PIDs from our list */
-
- except1 = unit_main_pid(u);
- except2 = unit_control_pid(u);
-
- SET_FOREACH(e, u->pids) {
- if (pidref_equal(except1, e) || pidref_equal(except2, e))
- continue;
-
- if (pidref_is_unwaited(e) <= 0)
- unit_unwatch_pidref(u, e);
- }
-}
-
-static int on_rewatch_pids_event(sd_event_source *s, void *userdata) {
- Unit *u = ASSERT_PTR(userdata);
-
- assert(s);
-
- unit_tidy_watch_pids(u);
- (void) unit_watch_all_pids(u);
-
- /* If the PID set is empty now, then let's finish this off. */
- unit_synthesize_cgroup_empty_event(u);
-
- return 0;
-}
-
-int unit_enqueue_rewatch_pids(Unit *u) {
- int r;
-
- assert(u);
-
- CGroupRuntime *crt = unit_get_cgroup_runtime(u);
- if (!crt || !crt->cgroup_path)
- return -ENOENT;
-
- r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
- if (r < 0)
- return r;
- if (r > 0) /* On unified we can use proper notifications */
- return 0;
-
- /* Enqueues a low-priority job that will clean up dead PIDs from our list of PIDs to watch and subscribe to new
- * PIDs that might have appeared. We do this in a delayed job because the work might be quite slow, as it
- * involves issuing kill(pid, 0) on all processes we watch. */
-
- if (!u->rewatch_pids_event_source) {
- _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
-
- r = sd_event_add_defer(u->manager->event, &s, on_rewatch_pids_event, u);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m");
-
- r = sd_event_source_set_priority(s, EVENT_PRIORITY_REWATCH_PIDS);
- if (r < 0)
- return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: %m");
-
- (void) sd_event_source_set_description(s, "tidy-watch-pids");
-
- u->rewatch_pids_event_source = TAKE_PTR(s);
- }
-
- r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_ONESHOT);
- if (r < 0)
- return log_error_errno(r, "Failed to enable event source for tidying watched PIDs: %m");
-
- return 0;
-}
-
-void unit_dequeue_rewatch_pids(Unit *u) {
- assert(u);
-
- u->rewatch_pids_event_source = sd_event_source_disable_unref(u->rewatch_pids_event_source);
-}
-
bool unit_job_is_applicable(Unit *u, JobType j) {
assert(u);
assert(j >= 0 && j < _JOB_TYPE_MAX);
UnitFileState unit_file_state;
PresetAction unit_file_preset;
- /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
- * ones which might have appeared. */
- sd_event_source *rewatch_pids_event_source;
-
/* How to start OnSuccess=/OnFailure= units */
JobMode on_success_job_mode;
JobMode on_failure_job_mode;
void unit_unwatch_all_pids(Unit *u);
void unit_unwatch_pidref_done(Unit *u, PidRef *pidref);
-int unit_enqueue_rewatch_pids(Unit *u);
-void unit_dequeue_rewatch_pids(Unit *u);
-
int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name);
int unit_watch_bus_name(Unit *u, const char *name);
void unit_unwatch_bus_name(Unit *u, const char *name);