From: Luca Boccassi Date: Wed, 12 Apr 2023 19:14:17 +0000 (+0100) Subject: Uphold/StopWhenUnneeded/BindsTo: add retry timer on rate limit X-Git-Tag: v254-rc1~724^2~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7223d500ac548c69e7879931e3ad8c84838f925b;p=thirdparty%2Fsystemd.git Uphold/StopWhenUnneeded/BindsTo: add retry timer on rate limit The Upholds= promise is that as long as unit A is up and Upholds=B, B will be activated if failed or inactive. But there is a hard-coded, non-configurable rate limit for this, so add a timed retry after the ratelimit has expired. Apply to BindsTo= and StopWhenUnneeded= as well. --- diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c index f90a63b1a90..a0260bfe1c4 100644 --- a/src/basic/ratelimit.c +++ b/src/basic/ratelimit.c @@ -38,3 +38,12 @@ unsigned ratelimit_num_dropped(RateLimit *r) { return r->num > r->burst ? r->num - r->burst : 0; } + +usec_t ratelimit_end(const RateLimit *rl) { + assert(rl); + + if (rl->begin == 0) + return 0; + + return usec_add(rl->begin, rl->interval); +} diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h index 2236189851e..048084ece49 100644 --- a/src/basic/ratelimit.h +++ b/src/basic/ratelimit.h @@ -23,3 +23,5 @@ static inline bool ratelimit_configured(RateLimit *rl) { bool ratelimit_below(RateLimit *r); unsigned ratelimit_num_dropped(RateLimit *r); + +usec_t ratelimit_end(const RateLimit *rl); diff --git a/src/core/manager.c b/src/core/manager.c index 47a502df7fb..fd576ef25bb 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1389,6 +1389,48 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) { return n; } +static int manager_ratelimit_requeue(sd_event_source *s, uint64_t usec, void *userdata) { + Unit *u = userdata; + + assert(u); + assert(s == u->auto_start_stop_event_source); + + u->auto_start_stop_event_source = sd_event_source_unref(u->auto_start_stop_event_source); + + /* Re-queue to all queues, if the rate limit hit we might have been throttled on any of them. */ + unit_submit_to_stop_when_unneeded_queue(u); + unit_submit_to_start_when_upheld_queue(u); + unit_submit_to_stop_when_bound_queue(u); + + return 0; +} + +static int manager_ratelimit_check_and_queue(Unit *u) { + int r; + + assert(u); + + if (ratelimit_below(&u->auto_start_stop_ratelimit)) + return 1; + + /* Already queued, no need to requeue */ + if (u->auto_start_stop_event_source) + return 0; + + r = sd_event_add_time( + u->manager->event, + &u->auto_start_stop_event_source, + CLOCK_MONOTONIC, + ratelimit_end(&u->auto_start_stop_ratelimit), + 0, + manager_ratelimit_requeue, + u); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to queue timer on event loop: %m"); + + return 0; +} + static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) { unsigned n = 0; Unit *u; @@ -1413,8 +1455,11 @@ static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) { /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the * service being unnecessary after a while. */ - if (!ratelimit_below(&u->auto_start_stop_ratelimit)) { - log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently."); + r = manager_ratelimit_check_and_queue(u); + if (r <= 0) { + log_unit_warning(u, + "Unit not needed anymore, but not stopping since we tried this too often recently.%s", + r == 0 ? " Will retry later." : ""); continue; } @@ -1452,8 +1497,12 @@ static unsigned manager_dispatch_start_when_upheld_queue(Manager *m) { /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the * service being unnecessary after a while. */ - if (!ratelimit_below(&u->auto_start_stop_ratelimit)) { - log_unit_warning(u, "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.", culprit->id); + r = manager_ratelimit_check_and_queue(u); + if (r <= 0) { + log_unit_warning(u, + "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.%s", + culprit->id, + r == 0 ? " Will retry later." : ""); continue; } @@ -1490,8 +1539,12 @@ static unsigned manager_dispatch_stop_when_bound_queue(Manager *m) { /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the * service being unnecessary after a while. */ - if (!ratelimit_below(&u->auto_start_stop_ratelimit)) { - log_unit_warning(u, "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.", culprit->id); + r = manager_ratelimit_check_and_queue(u); + if (r <= 0) { + log_unit_warning(u, + "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.%s", + culprit->id, + r == 0 ? " Will retry later." : ""); continue; } diff --git a/src/core/unit.c b/src/core/unit.c index 409801aed28..f3566a56729 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -676,6 +676,8 @@ Unit* unit_free(Unit *u) { if (!u) return NULL; + sd_event_source_disable_unref(u->auto_start_stop_event_source); + u->transient_file = safe_fclose(u->transient_file); if (!MANAGER_IS_RELOADING(u->manager)) diff --git a/src/core/unit.h b/src/core/unit.h index 420405b2b77..dd0d42dc909 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -347,6 +347,7 @@ typedef struct Unit { /* Make sure we never enter endless loops with the StopWhenUnneeded=, BindsTo=, Uphold= logic */ RateLimit auto_start_stop_ratelimit; + sd_event_source *auto_start_stop_event_source; /* Reference to a specific UID/GID */ uid_t ref_uid;