]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
master: Replace per-process idle_kill timeout with per-service
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 23 Mar 2023 14:07:26 +0000 (16:07 +0200)
committerTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 11 May 2023 09:15:05 +0000 (12:15 +0300)
This is much more efficient, since it doesn't have to keep updating the
timeout constantly for busy processes.

This change still preserves the original way the idle_kill setting behaves.

src/master/service-monitor.c
src/master/service-process.c
src/master/service-process.h
src/master/service.h

index b035d742b6abd2390074425a081dfb1923f8f9e2..ddfea6085119d4afeae69fa022e39246f7b0c3aa 100644 (file)
@@ -34,30 +34,67 @@ static void service_status_more(struct service_process *process,
                                const struct master_status *status);
 static void service_monitor_listen_start_force(struct service *service);
 
-static void service_process_kill_idle(struct service_process *process)
+static void service_process_idle_kill_timeout(struct service_process *process)
 {
-       struct service *service = process->service;
        struct master_status status;
 
-       i_assert(process->available_count == service->client_limit);
+       service_error(process->service, "Process %s is ignoring idle SIGINT",
+                     dec2str(process->pid));
+
+       /* assume this process is busy */
+       i_zero(&status);
+       service_status_more(process, &status);
+       process->available_count = 0;
+}
 
+static void service_kill_idle(struct service *service)
+{
+       /* Kill processes which have idled over idle_kill seconds.
+
+          (It's actually not important which processes get killed. A better
+          way could be to kill the oldest processes since they might have to
+          be restarted anyway soon due to reaching service_count, but we'd
+          have to use priority queue for tracking that, which is more
+          expensive and probably not worth it.) */
+       if (service->idle_processes_head == NULL) {
+               /* no idling processes to kill */
+               timeout_remove(&service->to_idle);
+               return;
+       }
        if (service->process_avail <= service->set->process_min_avail) {
                /* we don't have any extra idling processes anymore. */
-               timeout_remove(&process->to_idle);
-       } else if (process->last_kill_sent > process->last_status_update+1) {
-               service_error(service, "Process %s is ignoring idle SIGINT",
-                             dec2str(process->pid));
-
-               /* assume this process is busy */
-               i_zero(&status);
-               service_status_more(process, &status);
-               process->available_count = 0;
-       } else {
+               timeout_remove(&service->to_idle);
+               return;
+       }
+       time_t newest_time_to_kill = ioloop_time - service->idle_kill;
+       unsigned int max_processes_to_kill =
+               service->process_avail - service->set->process_min_avail;
+       for (; max_processes_to_kill > 0; max_processes_to_kill--) {
+               struct service_process *process = service->idle_processes_head;
+
+               if (process->to_idle_kill != NULL) {
+                       /* already tried to kill all the idle processes */
+                       break;
+               }
+               if (process->idle_start > newest_time_to_kill)
+                       break;
+
+               i_assert(process->available_count == service->client_limit);
                if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
                        service_error(service, "kill(%s, SIGINT) failed: %m",
                                      dec2str(process->pid));
                }
                process->last_kill_sent = ioloop_time;
+               process->to_idle_kill =
+                       timeout_add(service->idle_kill * 1000,
+                                   service_process_idle_kill_timeout, process);
+
+               /* Move it to the end of the list, so it's not tried to be
+                  killed again. */
+               DLLIST2_REMOVE(&service->idle_processes_head,
+                              &service->idle_processes_tail, process);
+               DLLIST2_APPEND(&service->idle_processes_head,
+                              &service->idle_processes_tail, process);
        }
 }
 
@@ -79,8 +116,6 @@ static void service_status_more(struct service_process *process,
        process->total_count +=
                process->available_count - status->available_count;
 
-       timeout_remove(&process->to_idle);
-
        if (status->available_count != 0)
                return;
 
@@ -120,16 +155,13 @@ static void service_check_idle(struct service_process *process)
        process->idle_start = ioloop_time;
 
        if (service->process_avail > service->set->process_min_avail &&
-           process->to_idle == NULL &&
+           service->to_idle == NULL &&
            service->idle_kill != UINT_MAX) {
-               /* we have more processes than we really need.
-                  add a bit of randomness so that we don't send the
-                  signal to all of them at once */
-               process->to_idle =
-                       timeout_add((service->idle_kill * 1000) +
-                                   i_rand_limit(100) * 10,
-                                   service_process_kill_idle,
-                                   process);
+               /* We have more processes than we really need. Start a timer
+                  to trigger idle_kill. */
+               service->to_idle =
+                       timeout_add(service->idle_kill * 1000,
+                                   service_kill_idle, service);
        }
 }
 
@@ -177,6 +209,8 @@ service_status_input_one(struct service *service,
                return;
        }
        process->last_status_update = ioloop_time;
+       /* process worked a bit - it may have ignored idle-kill signal */
+       timeout_remove(&process->to_idle_kill);
 
        /* first status notification */
        timeout_remove(&process->to_status);
@@ -600,6 +634,7 @@ void service_monitor_stop(struct service *service)
 
        timeout_remove(&service->to_throttle);
        timeout_remove(&service->to_prefork);
+       timeout_remove(&service->to_idle);
 }
 
 void service_monitor_stop_close(struct service *service)
index 296cc3e3e95422c4e8a20583ec5dc84e2830e668..0de515572b339f1a38038447493a81e427a237bb 100644 (file)
@@ -410,7 +410,7 @@ void service_process_destroy(struct service_process *process)
        i_assert(service->process_avail <= service->process_count);
 
        timeout_remove(&process->to_status);
-       timeout_remove(&process->to_idle);
+       timeout_remove(&process->to_idle_kill);
        if (service->list->log_byes != NULL)
                service_process_notify_add(service->list->log_byes, process);
 
index 797f63db4619c859c9fccf540a29e3f47ec22f10..f3e47bb245947f48f1d3a96e99f68aa958a0d08d 100644 (file)
@@ -26,8 +26,9 @@ struct service_process {
           This field also determines whether the process is in the service's
           "busy" or "idle" processes linked list. */
        time_t idle_start;
-       /* kill process if it hits idle timeout */
-       struct timeout *to_idle;
+       /* Timeout for processing idle-kill SIGINT. Either the process will die
+          or it sends a notification about not being idling anymore */
+       struct timeout *to_idle_kill;
 
        /* time when we last received a status update */
        time_t last_status_update;
index 8234d81af447e05d73f55d3012140b916a7e92ba..8287665d48f42de66b91df9fb7bab3824734fd42 100644 (file)
@@ -113,6 +113,8 @@ struct service {
        struct timeout *to_drop;
        /* delayed process_limit reached warning with SERVICE_TYPE_WORKER */
        struct timeout *to_drop_warning;
+       /* next time to try to kill idling processes */
+       struct timeout *to_idle;
 
        /* prefork processes up to process_min_avail if there's time */
        struct timeout *to_prefork;