]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Uphold/StopWhenUnneeded/BindsTo: add retry timer on rate limit
authorLuca Boccassi <bluca@debian.org>
Wed, 12 Apr 2023 19:14:17 +0000 (20:14 +0100)
committerLuca Boccassi <bluca@debian.org>
Wed, 12 Apr 2023 20:49:48 +0000 (21:49 +0100)
The Upholds= promise is that as long as unit A is up and Upholds=B,
B will be activated if failed or inactive. But there is a hard-coded,
non-configurable rate limit for this, so add a timed retry after the
ratelimit has expired.

Apply to BindsTo= and StopWhenUnneeded= as well.

src/basic/ratelimit.c
src/basic/ratelimit.h
src/core/manager.c
src/core/unit.c
src/core/unit.h

index f90a63b1a90882c09b0dd49db08408352d4c251d..a0260bfe1c42e997a91c09fd0316eec02b1d8547 100644 (file)
@@ -38,3 +38,12 @@ unsigned ratelimit_num_dropped(RateLimit *r) {
 
         return r->num > r->burst ? r->num - r->burst : 0;
 }
+
+usec_t ratelimit_end(const RateLimit *rl) {
+        assert(rl);
+
+        if (rl->begin == 0)
+                return 0;
+
+        return usec_add(rl->begin, rl->interval);
+}
index 2236189851e6fcbf2167826daa9a41e8a9f525c4..048084ece49b42bebb765afb5e95bcc8f47a41ba 100644 (file)
@@ -23,3 +23,5 @@ static inline bool ratelimit_configured(RateLimit *rl) {
 bool ratelimit_below(RateLimit *r);
 
 unsigned ratelimit_num_dropped(RateLimit *r);
+
+usec_t ratelimit_end(const RateLimit *rl);
index 47a502df7fb4592d6fe7950d09c23c679db816de..fd576ef25bbc2c18a9dcce60c8aa633d3f46859f 100644 (file)
@@ -1389,6 +1389,48 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) {
         return n;
 }
 
+static int manager_ratelimit_requeue(sd_event_source *s, uint64_t usec, void *userdata) {
+        Unit *u = userdata;
+
+        assert(u);
+        assert(s == u->auto_start_stop_event_source);
+
+        u->auto_start_stop_event_source = sd_event_source_unref(u->auto_start_stop_event_source);
+
+        /* Re-queue to all queues, if the rate limit hit we might have been throttled on any of them. */
+        unit_submit_to_stop_when_unneeded_queue(u);
+        unit_submit_to_start_when_upheld_queue(u);
+        unit_submit_to_stop_when_bound_queue(u);
+
+        return 0;
+}
+
+static int manager_ratelimit_check_and_queue(Unit *u) {
+        int r;
+
+        assert(u);
+
+        if (ratelimit_below(&u->auto_start_stop_ratelimit))
+                return 1;
+
+        /* Already queued, no need to requeue */
+        if (u->auto_start_stop_event_source)
+                return 0;
+
+        r = sd_event_add_time(
+                        u->manager->event,
+                        &u->auto_start_stop_event_source,
+                        CLOCK_MONOTONIC,
+                        ratelimit_end(&u->auto_start_stop_ratelimit),
+                        0,
+                        manager_ratelimit_requeue,
+                        u);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "Failed to queue timer on event loop: %m");
+
+        return 0;
+}
+
 static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
         unsigned n = 0;
         Unit *u;
@@ -1413,8 +1455,11 @@ static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
                 /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
                  * service being unnecessary after a while. */
 
-                if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
-                        log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
+                r = manager_ratelimit_check_and_queue(u);
+                if (r <= 0) {
+                        log_unit_warning(u,
+                                         "Unit not needed anymore, but not stopping since we tried this too often recently.%s",
+                                         r == 0 ? " Will retry later." : "");
                         continue;
                 }
 
@@ -1452,8 +1497,12 @@ static unsigned manager_dispatch_start_when_upheld_queue(Manager *m) {
                 /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
                  * service being unnecessary after a while. */
 
-                if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
-                        log_unit_warning(u, "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.", culprit->id);
+                r = manager_ratelimit_check_and_queue(u);
+                if (r <= 0) {
+                        log_unit_warning(u,
+                                         "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.%s",
+                                         culprit->id,
+                                         r == 0 ? " Will retry later." : "");
                         continue;
                 }
 
@@ -1490,8 +1539,12 @@ static unsigned manager_dispatch_stop_when_bound_queue(Manager *m) {
                 /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
                  * service being unnecessary after a while. */
 
-                if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
-                        log_unit_warning(u, "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.", culprit->id);
+                r = manager_ratelimit_check_and_queue(u);
+                if (r <= 0) {
+                        log_unit_warning(u,
+                                         "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.%s",
+                                         culprit->id,
+                                         r == 0 ? " Will retry later." : "");
                         continue;
                 }
 
index 409801aed2888828cc2ab11b629efbf4736d38fd..f3566a5672983362c22392cbf03a66cccd0ed4ab 100644 (file)
@@ -676,6 +676,8 @@ Unit* unit_free(Unit *u) {
         if (!u)
                 return NULL;
 
+        sd_event_source_disable_unref(u->auto_start_stop_event_source);
+
         u->transient_file = safe_fclose(u->transient_file);
 
         if (!MANAGER_IS_RELOADING(u->manager))
index 420405b2b7707f912e5fa094eea101e33d6881be..dd0d42dc909411d5847783659596670fbd4555ff 100644 (file)
@@ -347,6 +347,7 @@ typedef struct Unit {
 
         /* Make sure we never enter endless loops with the StopWhenUnneeded=, BindsTo=, Uphold= logic */
         RateLimit auto_start_stop_ratelimit;
+        sd_event_source *auto_start_stop_event_source;
 
         /* Reference to a specific UID/GID */
         uid_t ref_uid;