]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
userdbd: when we hit a flood of requests to start more workers, don't exit 28129/head
authorLennart Poettering <lennart@poettering.net>
Thu, 22 Jun 2023 16:51:34 +0000 (18:51 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 22 Jun 2023 16:59:24 +0000 (18:59 +0200)
Let's tweak what we do if we detect a flood of requests to start more
workers: if none of the workers ever sticks (i.e. the worker count is
zero) then let's just give up, as before.

Otherwise, let's just not start more workers for a while, and do so
again after a while. Thus spawning ofr workers will "cool off" for a
while.

Fixes: #27028
src/userdb/userdbd-manager.c
src/userdb/userdbd-manager.h

index 372de720f782b5ec6b2cc013a8e2ca6f8441f766..73da3fb1e8ca201117c670fbe7b92b0d001ba26f 100644 (file)
@@ -53,6 +53,17 @@ static int on_sigusr2(sd_event_source *s, const struct signalfd_siginfo *si, voi
         return 0;
 }
 
+static int on_deferred_start_worker(sd_event_source *s, uint64_t usec, void *userdata) {
+        Manager *m = ASSERT_PTR(userdata);
+
+        assert(s);
+
+        m->deferred_start_worker_event_source = sd_event_source_unref(m->deferred_start_worker_event_source);
+
+        (void) start_workers(m, /* explicit_request=*/ false);
+        return 0;
+}
+
 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
                 event_source_hash_ops,
                 sd_event_source,
@@ -111,6 +122,8 @@ Manager* manager_free(Manager *m) {
         set_free(m->workers_fixed);
         set_free(m->workers_dynamic);
 
+        m->deferred_start_worker_event_source = sd_event_source_unref(m->deferred_start_worker_event_source);
+
         sd_event_unref(m->event);
 
         return mfree(m);
@@ -213,10 +226,31 @@ static int start_workers(Manager *m, bool explicit_request) {
                         break;
 
                 if (!ratelimit_below(&m->worker_ratelimit)) {
-                        /* If we keep starting workers too often, let's fail the whole daemon, something is wrong */
-                        sd_event_exit(m->event, EXIT_FAILURE);
 
-                        return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "Worker threads requested too frequently, something is wrong.");
+                        /* If we keep starting workers too often but none sticks, let's fail the whole
+                         * daemon, something is wrong */
+                        if (n == 0) {
+                                sd_event_exit(m->event, EXIT_FAILURE);
+                                return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "Worker threads requested too frequently, but worker count is zero, something is wrong.");
+                        }
+
+                        /* Otherwise, let's stop spawning more for a while. */
+                        log_warning("Worker threads requested too frequently, not starting new ones for a while.");
+
+                        if (!m->deferred_start_worker_event_source) {
+                                r = sd_event_add_time(
+                                                m->event,
+                                                &m->deferred_start_worker_event_source,
+                                                CLOCK_MONOTONIC,
+                                                ratelimit_end(&m->worker_ratelimit),
+                                                /* accuracy_usec= */ 0,
+                                                on_deferred_start_worker,
+                                                m);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to allocate deferred start worker event source: %m");
+                        }
+
+                        break;
                 }
 
                 r = start_one_worker(m);
index 4fb45e6c356ddd293992b4736ad37b89b3d72315..c39f79d25ca51a19af7c9227f1dde212c7c14020 100644 (file)
@@ -21,6 +21,8 @@ struct Manager {
         int listen_fd;
 
         RateLimit worker_ratelimit;
+
+        sd_event_source *deferred_start_worker_event_source;
 };
 
 int manager_new(Manager **ret);