]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
master: Fix service { idle_kill } to work better on busy servers
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 23 Mar 2023 20:23:39 +0000 (22:23 +0200)
committerTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 11 May 2023 09:15:06 +0000 (12:15 +0300)
The previous behavior was to kill a process once it had idled for idle_kill
seconds. However, on a busy server the new connections are picked up somewhat
randomly by all the idling processes, so there's never any single process
idling for a long time. This effectively prevents the idle_kill from killing
any processes, even if there are unnecessarily many of them.

The new behavior here tracks the lowest number of idling processes during
idle_kill time interval. Then it kills that many processes. If the load
stays the same, this should shrink the number of processes to the number
that is needed to handle the load, but no more.

src/master/service-monitor.c
src/master/service-process.c
src/master/service.h

index ddfea6085119d4afeae69fa022e39246f7b0c3aa..0a65733a877926bda338d97d5dc4ac7d5d30b150 100644 (file)
@@ -49,35 +49,37 @@ static void service_process_idle_kill_timeout(struct service_process *process)
 
 static void service_kill_idle(struct service *service)
 {
-       /* Kill processes which have idled over idle_kill seconds.
+       /* Kill extra idling processes to reduce their number. The idea here is
+          that if the load stays the same, by killing the lowwater number of
+          processes there won't be any extra idling processes left. */
+       unsigned int processes_to_kill =
+               service->process_idling_lowwater_since_kills;
+       service->process_idling_lowwater_since_kills = service->process_idling;
+
+       /* Always try to leave process_min_avail processes */
+       i_assert(processes_to_kill <= service->process_avail);
+       if (processes_to_kill <= service->set->process_min_avail) {
+               if (service->process_idling == 0)
+                       timeout_remove(&service->to_idle);
+               return;
+       }
+       processes_to_kill -= service->set->process_min_avail;
+
+       /* Now, kill the processes with the oldest idle_start time.
 
           (It's actually not important which processes get killed. A better
           way could be to kill the oldest processes since they might have to
           be restarted anyway soon due to reaching service_count, but we'd
           have to use priority queue for tracking that, which is more
           expensive and probably not worth it.) */
-       if (service->idle_processes_head == NULL) {
-               /* no idling processes to kill */
-               timeout_remove(&service->to_idle);
-               return;
-       }
-       if (service->process_avail <= service->set->process_min_avail) {
-               /* we don't have any extra idling processes anymore. */
-               timeout_remove(&service->to_idle);
-               return;
-       }
-       time_t newest_time_to_kill = ioloop_time - service->idle_kill;
-       unsigned int max_processes_to_kill =
-               service->process_avail - service->set->process_min_avail;
-       for (; max_processes_to_kill > 0; max_processes_to_kill--) {
+       for (; processes_to_kill > 0; processes_to_kill--) {
                struct service_process *process = service->idle_processes_head;
 
+               i_assert(process != NULL);
                if (process->to_idle_kill != NULL) {
                        /* already tried to kill all the idle processes */
                        break;
                }
-               if (process->idle_start > newest_time_to_kill)
-                       break;
 
                i_assert(process->available_count == service->client_limit);
                if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
@@ -112,6 +114,9 @@ static void service_status_more(struct service_process *process,
 
                i_assert(service->process_idling > 0);
                service->process_idling--;
+               service->process_idling_lowwater_since_kills =
+                       I_MIN(service->process_idling_lowwater_since_kills,
+                             service->process_idling);
        }
        process->total_count +=
                process->available_count - status->available_count;
index 0de515572b339f1a38038447493a81e427a237bb..3d830d591101dffb43eab3023b97a345ff34b7be 100644 (file)
@@ -398,6 +398,9 @@ void service_process_destroy(struct service_process *process)
                               &service->idle_processes_tail, process);
                i_assert(service->process_idling > 0);
                service->process_idling--;
+               service->process_idling_lowwater_since_kills =
+                       I_MIN(service->process_idling_lowwater_since_kills,
+                             service->process_idling);
        }
        hash_table_remove(service_pids, POINTER_CAST(process->pid));
 
index 8287665d48f42de66b91df9fb7bab3824734fd42..c6ea604e21cc5b0940fb2d3cee371f92a9f7de91 100644 (file)
@@ -71,6 +71,9 @@ struct service {
        unsigned int process_avail;
        /* number of processes currently idling (idle_start != 0) */
        unsigned int process_idling;
+       /* Lowest number of processes that have been idling at the same time.
+          This is reset to process_idling every idle_kill seconds. */
+       unsigned int process_idling_lowwater_since_kills;
        /* max number of processes allowed */
        unsigned int process_limit;
        /* Total number of processes ever created */