]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
master: Fix service { idle_kill } to work better on busy servers
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 23 Mar 2023 20:23:39 +0000 (22:23 +0200)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Fri, 31 Mar 2023 10:12:57 +0000 (10:12 +0000)
The previous behavior was to kill a process once it had idled for idle_kill
seconds. However, on a busy server the new connections are picked up somewhat
randomly by all the idling processes, so there's never any single process
idling for a long time. This effectively prevents the idle_kill from killing
any processes, even if there are unnecessarily many of them.

The new behavior here tracks the lowest number of idling processes during
idle_kill time interval. Then it kills that many processes. If the load
stays the same, this should shrink the number of processes to the number
that is needed to handle the load, but no more.

src/master/service-monitor.c
src/master/service-process.c
src/master/service.h

index 010cd450e40380826440fa12f7661eeab7d191ac..e386e816dced309e47b7e6eef0f5a1295eb2d7a0 100644 (file)
@@ -50,35 +50,37 @@ static void service_process_idle_kill_timeout(struct service_process *process)
 
 static void service_kill_idle(struct service *service)
 {
-       /* Kill processes which have idled over idle_kill seconds.
+       /* Kill extra idling processes to reduce their number. The idea here is
+          that if the load stays the same, by killing the lowwater number of
+          processes there won't be any extra idling processes left. */
+       unsigned int processes_to_kill =
+               service->process_idling_lowwater_since_kills;
+       service->process_idling_lowwater_since_kills = service->process_idling;
+
+       /* Always try to leave process_min_avail processes */
+       i_assert(processes_to_kill <= service->process_avail);
+       if (processes_to_kill <= service->set->process_min_avail) {
+               if (service->process_idling == 0)
+                       timeout_remove(&service->to_idle);
+               return;
+       }
+       processes_to_kill -= service->set->process_min_avail;
+
+       /* Now, kill the processes with the oldest idle_start time.
 
           (It's actually not important which processes get killed. A better
           way could be to kill the oldest processes since they might have to
           be restarted anyway soon due to reaching service_count, but we'd
           have to use priority queue for tracking that, which is more
           expensive and probably not worth it.) */
-       if (service->idle_processes_head == NULL) {
-               /* no idling processes to kill */
-               timeout_remove(&service->to_idle);
-               return;
-       }
-       if (service->process_avail <= service->set->process_min_avail) {
-               /* we don't have any extra idling processes anymore. */
-               timeout_remove(&service->to_idle);
-               return;
-       }
-       time_t newest_time_to_kill = ioloop_time - service->idle_kill;
-       unsigned int max_processes_to_kill =
-               service->process_avail - service->set->process_min_avail;
-       for (; max_processes_to_kill > 0; max_processes_to_kill--) {
+       for (; processes_to_kill > 0; processes_to_kill--) {
                struct service_process *process = service->idle_processes_head;
 
+               i_assert(process != NULL);
                if (process->to_idle_kill != NULL) {
                        /* already tried to kill all the idle processes */
                        break;
                }
-               if (process->idle_start > newest_time_to_kill)
-                       break;
 
                i_assert(process->available_count == service->client_limit);
                if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
@@ -113,6 +115,9 @@ static void service_status_more(struct service_process *process,
 
                i_assert(service->process_idling > 0);
                service->process_idling--;
+               service->process_idling_lowwater_since_kills =
+                       I_MIN(service->process_idling_lowwater_since_kills,
+                             service->process_idling);
        }
        process->total_count +=
                process->available_count - status->available_count;
index ca2667a5de25201fdb48c7e32e52d30c0b2e29bc..43d6cf808abf962edd730b692948016fb605745c 100644 (file)
@@ -470,6 +470,9 @@ void service_process_destroy(struct service_process *process)
                               &service->idle_processes_tail, process);
                i_assert(service->process_idling > 0);
                service->process_idling--;
+               service->process_idling_lowwater_since_kills =
+                       I_MIN(service->process_idling_lowwater_since_kills,
+                             service->process_idling);
        }
        hash_table_remove(service_pids, POINTER_CAST(process->pid));
 
index c1e045df0d6623abe42bf82d23c4631416de2b6e..1cd6e3cfbd422ccf58ef3a96a57ae6a29f2b97ef 100644 (file)
@@ -74,6 +74,9 @@ struct service {
        unsigned int process_avail;
        /* number of processes currently idling (idle_start != 0) */
        unsigned int process_idling;
+       /* Lowest number of processes that have been idling at the same time.
+          This is reset to process_idling every idle_kill seconds. */
+       unsigned int process_idling_lowwater_since_kills;
        /* max number of processes allowed */
        unsigned int process_limit;
        /* Total number of processes ever created */