From: Lennart Poettering Date: Wed, 29 Mar 2023 19:52:41 +0000 (+0200) Subject: service: release resources from a seperate queue, not unit_check_gc() X-Git-Tag: v254-rc1~736^2~8 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6ac62d61db737b01ad3776a7688d8a4c57b3f7d9;p=thirdparty%2Fsystemd.git service: release resources from a seperate queue, not unit_check_gc() The per-unit-type release_resources() hook (most prominent use: to release a service unit's fdstore once a unit is entirely dead and has no jobs more) was currently invoked as part of unit_check_gc(), whose primary purpose is to determine if a unit should be GC'ed. This was always a bit ugly, as release_resources() changes state of the unit, while unit_check_gc() is otherwise (and was before release_resources() was added) a "passive" function that just checks for a couple of conditions. unit_check_gc() is called at various places, including when we wonder if we should add a unit to the gc queue, and then again when we take it out of the gc queue to dtermine whether to really gc it now. The fact that these checks have side effects so far wasn't too problematic, as the state changes (primarily: that services would empty their fdstores) were relatively limited and scope. A later patch in this series is supposed to extend the service state engine with a separate state distinct from SERVICE_DEAD that is very much like it but indicates that the service still has active resources (specifically the fdstore). For cases like that the releasing of the fdstore would result in state changes (as we'd then return to a classic SERVICE_DEAD state). And this is where the fact that the release_resources() is called as side-effect becomes problematic: it would mean that unit state changes would instantly propagate to state changes elsewhere, though we usually want this to be done through the run queue for coalescing and avoidance of recursion. Hence, let's clean this up: let's move the release_resources() logic into a queue of its own, and then enqueue items into it from the general state change notification handle in unit_notify(). --- diff --git a/src/core/manager.c b/src/core/manager.c index 47a502df7fb..b859ce58af3 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1250,6 +1250,26 @@ static unsigned manager_dispatch_cleanup_queue(Manager *m) { return n; } +static unsigned manager_dispatch_release_resources_queue(Manager *m) { + unsigned n = 0; + Unit *u; + + assert(m); + + while ((u = m->release_resources_queue)) { + assert(u->in_release_resources_queue); + + LIST_REMOVE(release_resources_queue, m->release_resources_queue, u); + u->in_release_resources_queue = false; + + n++; + + unit_release_resources(u); + } + + return n; +} + enum { GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */ GC_OFFSET_UNSURE, /* No clue */ @@ -3158,6 +3178,9 @@ int manager_loop(Manager *m) { if (manager_dispatch_stop_when_unneeded_queue(m) > 0) continue; + if (manager_dispatch_release_resources_queue(m) > 0) + continue; + if (manager_dispatch_dbus_queue(m) > 0) continue; diff --git a/src/core/manager.h b/src/core/manager.h index fc95b751201..486eda11b60 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -195,6 +195,9 @@ struct Manager { /* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */ LIST_HEAD(Unit, stop_when_bound_queue); + /* Units that have resources open, and where it might be good to check if they can be released now */ + LIST_HEAD(Unit, release_resources_queue); + sd_event *event; /* This maps PIDs we care about to units that are interested in. We allow multiple units to be interested in diff --git a/src/core/unit.c b/src/core/unit.c index 409801aed28..089213cf88e 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -394,37 +394,54 @@ static bool unit_success_failure_handler_has_jobs(Unit *unit) { return false; } +void unit_release_resources(Unit *u) { + UnitActiveState state; + + assert(u); + + if (u->job || u->nop_job) + return; + + if (u->perpetual) + return; + + state = unit_active_state(u); + if (!IN_SET(state, UNIT_INACTIVE, UNIT_FAILED)) + return; + + if (unit_will_restart(u)) + return; + + if (UNIT_VTABLE(u)->release_resources) + UNIT_VTABLE(u)->release_resources(u); +} + bool unit_may_gc(Unit *u) { UnitActiveState state; int r; assert(u); - /* Checks whether the unit is ready to be unloaded for garbage collection. - * Returns true when the unit may be collected, and false if there's some - * reason to keep it loaded. + /* Checks whether the unit is ready to be unloaded for garbage collection. Returns true when the + * unit may be collected, and false if there's some reason to keep it loaded. * - * References from other units are *not* checked here. Instead, this is done - * in unit_gc_sweep(), but using markers to properly collect dependency loops. + * References from other units are *not* checked here. Instead, this is done in unit_gc_sweep(), but + * using markers to properly collect dependency loops. */ if (u->job || u->nop_job) return false; - state = unit_active_state(u); - - /* If the unit is inactive and failed and no job is queued for it, then release its runtime resources */ - if (UNIT_IS_INACTIVE_OR_FAILED(state) && - UNIT_VTABLE(u)->release_resources) - UNIT_VTABLE(u)->release_resources(u); - if (u->perpetual) return false; if (sd_bus_track_count(u->bus_track) > 0) return false; - /* But we keep the unit object around for longer when it is referenced or configured to not be gc'ed */ + state = unit_active_state(u); + + /* But we keep the unit object around for longer when it is referenced or configured to not be + * gc'ed */ switch (u->collect_mode) { case COLLECT_INACTIVE: @@ -458,10 +475,10 @@ bool unit_may_gc(Unit *u) { return false; } - if (UNIT_VTABLE(u)->may_gc && !UNIT_VTABLE(u)->may_gc(u)) - return false; + if (!UNIT_VTABLE(u)->may_gc) + return true; - return true; + return UNIT_VTABLE(u)->may_gc(u); } void unit_add_to_load_queue(Unit *u) { @@ -565,6 +582,25 @@ void unit_submit_to_stop_when_bound_queue(Unit *u) { u->in_stop_when_bound_queue = true; } +void unit_submit_to_release_resources_queue(Unit *u) { + assert(u); + + if (u->in_release_resources_queue) + return; + + if (u->job || u->nop_job) + return; + + if (u->perpetual) + return; + + if (!UNIT_VTABLE(u)->release_resources) + return; + + LIST_PREPEND(release_resources_queue, u->manager->release_resources_queue, u); + u->in_release_resources_queue = true; +} + static void unit_clear_dependencies(Unit *u) { assert(u); @@ -781,6 +817,9 @@ Unit* unit_free(Unit *u) { if (u->in_stop_when_bound_queue) LIST_REMOVE(stop_when_bound_queue, u->manager->stop_when_bound_queue, u); + if (u->in_release_resources_queue) + LIST_REMOVE(release_resources_queue, u->manager->release_resources_queue, u); + bpf_firewall_close(u); hashmap_free(u->bpf_foreign_by_key); @@ -2567,7 +2606,6 @@ static bool unit_process_job(Job *j, UnitActiveState ns, UnitNotifyFlags flags) assert(j); if (j->state == JOB_WAITING) - /* So we reached a different state for this job. Let's see if we can run it now if it failed previously * due to EAGAIN. */ job_add_to_run_queue(j); @@ -2771,6 +2809,9 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag /* Maybe the unit should be GC'ed now? */ unit_add_to_gc_queue(u); + + /* Maybe we can release some resources now? */ + unit_submit_to_release_resources_queue(u); } if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) { diff --git a/src/core/unit.h b/src/core/unit.h index 420405b2b77..b13ac33018a 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -317,6 +317,9 @@ typedef struct Unit { /* Queue of units that have a BindTo= dependency on some other unit, and should possibly be shut down */ LIST_FIELDS(Unit, stop_when_bound_queue); + /* Queue of units that should be checked if they can release resources now */ + LIST_FIELDS(Unit, release_resources_queue); + /* PIDs we keep an eye on. Note that a unit might have many * more, but these are the ones we care enough about to * process SIGCHLD for */ @@ -479,6 +482,7 @@ typedef struct Unit { bool in_stop_when_unneeded_queue:1; bool in_start_when_upheld_queue:1; bool in_stop_when_bound_queue:1; + bool in_release_resources_queue:1; bool sent_dbus_new_signal:1; @@ -832,6 +836,8 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c); int unit_choose_id(Unit *u, const char *name); int unit_set_description(Unit *u, const char *description); +void unit_release_resources(Unit *u); + bool unit_may_gc(Unit *u); static inline bool unit_is_extrinsic(Unit *u) { @@ -853,6 +859,7 @@ void unit_add_to_target_deps_queue(Unit *u); void unit_submit_to_stop_when_unneeded_queue(Unit *u); void unit_submit_to_start_when_upheld_queue(Unit *u); void unit_submit_to_stop_when_bound_queue(Unit *u); +void unit_submit_to_release_resources_queue(Unit *u); int unit_merge(Unit *u, Unit *other); int unit_merge_by_name(Unit *u, const char *other);