From: Timo Sirainen Date: Thu, 23 Mar 2023 20:23:39 +0000 (+0200) Subject: master: Fix service { idle_kill } to work better on busy servers X-Git-Tag: 2.4.0~2822 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=038f182c5238570a781212fe2528245e303305d7;p=thirdparty%2Fdovecot%2Fcore.git master: Fix service { idle_kill } to work better on busy servers The previous behavior was to kill a process once it had idled for idle_kill seconds. However, on a busy server the new connections are picked up somewhat randomly by all the idling processes, so there's never any single process idling for a long time. This effectively prevents the idle_kill from killing any processes, even if there are unnecessarily many of them. The new behavior here tracks the lowest number of idling processes during idle_kill time interval. Then it kills that many processes. If the load stays the same, this should shrink the number of processes to the number that is needed to handle the load, but no more. --- diff --git a/src/master/service-monitor.c b/src/master/service-monitor.c index 010cd450e4..e386e816dc 100644 --- a/src/master/service-monitor.c +++ b/src/master/service-monitor.c @@ -50,35 +50,37 @@ static void service_process_idle_kill_timeout(struct service_process *process) static void service_kill_idle(struct service *service) { - /* Kill processes which have idled over idle_kill seconds. + /* Kill extra idling processes to reduce their number. The idea here is + that if the load stays the same, by killing the lowwater number of + processes there won't be any extra idling processes left. */ + unsigned int processes_to_kill = + service->process_idling_lowwater_since_kills; + service->process_idling_lowwater_since_kills = service->process_idling; + + /* Always try to leave process_min_avail processes */ + i_assert(processes_to_kill <= service->process_avail); + if (processes_to_kill <= service->set->process_min_avail) { + if (service->process_idling == 0) + timeout_remove(&service->to_idle); + return; + } + processes_to_kill -= service->set->process_min_avail; + + /* Now, kill the processes with the oldest idle_start time. (It's actually not important which processes get killed. A better way could be to kill the oldest processes since they might have to be restarted anyway soon due to reaching service_count, but we'd have to use priority queue for tracking that, which is more expensive and probably not worth it.) */ - if (service->idle_processes_head == NULL) { - /* no idling processes to kill */ - timeout_remove(&service->to_idle); - return; - } - if (service->process_avail <= service->set->process_min_avail) { - /* we don't have any extra idling processes anymore. */ - timeout_remove(&service->to_idle); - return; - } - time_t newest_time_to_kill = ioloop_time - service->idle_kill; - unsigned int max_processes_to_kill = - service->process_avail - service->set->process_min_avail; - for (; max_processes_to_kill > 0; max_processes_to_kill--) { + for (; processes_to_kill > 0; processes_to_kill--) { struct service_process *process = service->idle_processes_head; + i_assert(process != NULL); if (process->to_idle_kill != NULL) { /* already tried to kill all the idle processes */ break; } - if (process->idle_start > newest_time_to_kill) - break; i_assert(process->available_count == service->client_limit); if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) { @@ -113,6 +115,9 @@ static void service_status_more(struct service_process *process, i_assert(service->process_idling > 0); service->process_idling--; + service->process_idling_lowwater_since_kills = + I_MIN(service->process_idling_lowwater_since_kills, + service->process_idling); } process->total_count += process->available_count - status->available_count; diff --git a/src/master/service-process.c b/src/master/service-process.c index ca2667a5de..43d6cf808a 100644 --- a/src/master/service-process.c +++ b/src/master/service-process.c @@ -470,6 +470,9 @@ void service_process_destroy(struct service_process *process) &service->idle_processes_tail, process); i_assert(service->process_idling > 0); service->process_idling--; + service->process_idling_lowwater_since_kills = + I_MIN(service->process_idling_lowwater_since_kills, + service->process_idling); } hash_table_remove(service_pids, POINTER_CAST(process->pid)); diff --git a/src/master/service.h b/src/master/service.h index c1e045df0d..1cd6e3cfbd 100644 --- a/src/master/service.h +++ b/src/master/service.h @@ -74,6 +74,9 @@ struct service { unsigned int process_avail; /* number of processes currently idling (idle_start != 0) */ unsigned int process_idling; + /* Lowest number of processes that have been idling at the same time. + This is reset to process_idling every idle_kill seconds. */ + unsigned int process_idling_lowwater_since_kills; /* max number of processes allowed */ unsigned int process_limit; /* Total number of processes ever created */