From: Timo Sirainen Date: Thu, 23 Mar 2023 14:07:26 +0000 (+0200) Subject: master: Replace per-process idle_kill timeout with per-service X-Git-Tag: 2.3.21~81 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d0e74ddac0fa471d13b5e4f70e6f33ac2379cf06;p=thirdparty%2Fdovecot%2Fcore.git master: Replace per-process idle_kill timeout with per-service This is much more efficient, since it doesn't have to keep updating the timeout constantly for busy processes. This change still preserves the original way the idle_kill setting behaves. --- diff --git a/src/master/service-monitor.c b/src/master/service-monitor.c index b035d742b6..ddfea60851 100644 --- a/src/master/service-monitor.c +++ b/src/master/service-monitor.c @@ -34,30 +34,67 @@ static void service_status_more(struct service_process *process, const struct master_status *status); static void service_monitor_listen_start_force(struct service *service); -static void service_process_kill_idle(struct service_process *process) +static void service_process_idle_kill_timeout(struct service_process *process) { - struct service *service = process->service; struct master_status status; - i_assert(process->available_count == service->client_limit); + service_error(process->service, "Process %s is ignoring idle SIGINT", + dec2str(process->pid)); + + /* assume this process is busy */ + i_zero(&status); + service_status_more(process, &status); + process->available_count = 0; +} +static void service_kill_idle(struct service *service) +{ + /* Kill processes which have idled over idle_kill seconds. + + (It's actually not important which processes get killed. A better + way could be to kill the oldest processes since they might have to + be restarted anyway soon due to reaching service_count, but we'd + have to use priority queue for tracking that, which is more + expensive and probably not worth it.) */ + if (service->idle_processes_head == NULL) { + /* no idling processes to kill */ + timeout_remove(&service->to_idle); + return; + } if (service->process_avail <= service->set->process_min_avail) { /* we don't have any extra idling processes anymore. */ - timeout_remove(&process->to_idle); - } else if (process->last_kill_sent > process->last_status_update+1) { - service_error(service, "Process %s is ignoring idle SIGINT", - dec2str(process->pid)); - - /* assume this process is busy */ - i_zero(&status); - service_status_more(process, &status); - process->available_count = 0; - } else { + timeout_remove(&service->to_idle); + return; + } + time_t newest_time_to_kill = ioloop_time - service->idle_kill; + unsigned int max_processes_to_kill = + service->process_avail - service->set->process_min_avail; + for (; max_processes_to_kill > 0; max_processes_to_kill--) { + struct service_process *process = service->idle_processes_head; + + if (process->to_idle_kill != NULL) { + /* already tried to kill all the idle processes */ + break; + } + if (process->idle_start > newest_time_to_kill) + break; + + i_assert(process->available_count == service->client_limit); if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) { service_error(service, "kill(%s, SIGINT) failed: %m", dec2str(process->pid)); } process->last_kill_sent = ioloop_time; + process->to_idle_kill = + timeout_add(service->idle_kill * 1000, + service_process_idle_kill_timeout, process); + + /* Move it to the end of the list, so it's not tried to be + killed again. */ + DLLIST2_REMOVE(&service->idle_processes_head, + &service->idle_processes_tail, process); + DLLIST2_APPEND(&service->idle_processes_head, + &service->idle_processes_tail, process); } } @@ -79,8 +116,6 @@ static void service_status_more(struct service_process *process, process->total_count += process->available_count - status->available_count; - timeout_remove(&process->to_idle); - if (status->available_count != 0) return; @@ -120,16 +155,13 @@ static void service_check_idle(struct service_process *process) process->idle_start = ioloop_time; if (service->process_avail > service->set->process_min_avail && - process->to_idle == NULL && + service->to_idle == NULL && service->idle_kill != UINT_MAX) { - /* we have more processes than we really need. - add a bit of randomness so that we don't send the - signal to all of them at once */ - process->to_idle = - timeout_add((service->idle_kill * 1000) + - i_rand_limit(100) * 10, - service_process_kill_idle, - process); + /* We have more processes than we really need. Start a timer + to trigger idle_kill. */ + service->to_idle = + timeout_add(service->idle_kill * 1000, + service_kill_idle, service); } } @@ -177,6 +209,8 @@ service_status_input_one(struct service *service, return; } process->last_status_update = ioloop_time; + /* process worked a bit - it may have ignored idle-kill signal */ + timeout_remove(&process->to_idle_kill); /* first status notification */ timeout_remove(&process->to_status); @@ -600,6 +634,7 @@ void service_monitor_stop(struct service *service) timeout_remove(&service->to_throttle); timeout_remove(&service->to_prefork); + timeout_remove(&service->to_idle); } void service_monitor_stop_close(struct service *service) diff --git a/src/master/service-process.c b/src/master/service-process.c index 296cc3e3e9..0de515572b 100644 --- a/src/master/service-process.c +++ b/src/master/service-process.c @@ -410,7 +410,7 @@ void service_process_destroy(struct service_process *process) i_assert(service->process_avail <= service->process_count); timeout_remove(&process->to_status); - timeout_remove(&process->to_idle); + timeout_remove(&process->to_idle_kill); if (service->list->log_byes != NULL) service_process_notify_add(service->list->log_byes, process); diff --git a/src/master/service-process.h b/src/master/service-process.h index 797f63db46..f3e47bb245 100644 --- a/src/master/service-process.h +++ b/src/master/service-process.h @@ -26,8 +26,9 @@ struct service_process { This field also determines whether the process is in the service's "busy" or "idle" processes linked list. */ time_t idle_start; - /* kill process if it hits idle timeout */ - struct timeout *to_idle; + /* Timeout for processing idle-kill SIGINT. Either the process will die + or it sends a notification about not being idling anymore */ + struct timeout *to_idle_kill; /* time when we last received a status update */ time_t last_status_update; diff --git a/src/master/service.h b/src/master/service.h index 8234d81af4..8287665d48 100644 --- a/src/master/service.h +++ b/src/master/service.h @@ -113,6 +113,8 @@ struct service { struct timeout *to_drop; /* delayed process_limit reached warning with SERVICE_TYPE_WORKER */ struct timeout *to_drop_warning; + /* next time to try to kill idling processes */ + struct timeout *to_idle; /* prefork processes up to process_min_avail if there's time */ struct timeout *to_prefork;