]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
master: Improve killing processes when they don't die after reload
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Thu, 24 Aug 2017 13:24:21 +0000 (16:24 +0300)
committerAki Tuomi <aki.tuomi@dovecot.fi>
Fri, 25 Aug 2017 12:21:10 +0000 (15:21 +0300)
The old behavior was:
1. Send SIGTERM to all processes, except log
2. Send SIGTERM to all processes, including log
3. Send SIGKILL to all processes, including log

The new behavior is now:
1. Send SIGTERM to all processes, except log
2. Send SIGKILL to all processes, except log

Only after there aren't any processes left except log, send SIGTERM to it.
If that doesn't work, send SIGKILL.

src/master/main.c
src/master/service.c
src/master/service.h

index 493ddd796a740e65c688f7e713be7abdddb26527..0693d5be75b1b88444e70f9f8cb1a50cdced4cc5 100644 (file)
@@ -380,7 +380,8 @@ sig_settings_reload(const siginfo_t *si ATTR_UNUSED,
 static void
 sig_log_reopen(const siginfo_t *si ATTR_UNUSED, void *context ATTR_UNUSED)
 {
-        service_signal(services->log, SIGUSR1);
+       unsigned int uninitialized_count;
+       service_signal(services->log, SIGUSR1, &uninitialized_count);
 
        master_service_init_log(master_service, "master: ");
        i_set_fatal_handler(master_fatal_callback);
index 4d2b460c080822b2f0e9b6103769bcda537af982..cb75033b1558db78f0cbdb6877a6271534a65f97 100644 (file)
@@ -498,17 +498,20 @@ int services_create(const struct master_settings *set,
        return 0;
 }
 
-void service_signal(struct service *service, int signo)
+unsigned int service_signal(struct service *service, int signo,
+                           unsigned int *uninitialized_count_r)
 {
        struct service_process *process = service->processes;
        unsigned int count = 0;
 
+       *uninitialized_count_r = 0;
        for (; process != NULL; process = process->next) {
                i_assert(process->service == service);
 
                if (!SERVICE_PROCESS_IS_INITIALIZED(process) &&
                    signo != SIGKILL) {
                        /* too early to signal it */
+                       *uninitialized_count_r += 1;
                        continue;
                }
                    
@@ -524,15 +527,18 @@ void service_signal(struct service *service, int signo)
                          signo == SIGTERM ? "SIGTERM" : "SIGKILL",
                          count, service->set->name);
        }
+       return count;
 }
 
 static void service_login_notify_send(struct service *service)
 {
+       unsigned int uninitialized_count;
+
        service->last_login_notify_time = ioloop_time;
        if (service->to_login_notify != NULL)
                timeout_remove(&service->to_login_notify);
 
-       service_signal(service, SIGUSR1);
+       service_signal(service, SIGUSR1, &uninitialized_count);
 }
 
 static void service_login_notify_timeout(struct service *service)
@@ -574,14 +580,14 @@ void service_login_notify(struct service *service, bool all_processes_full)
 static void services_kill_timeout(struct service_list *service_list)
 {
        struct service *const *services, *log_service;
-       bool sigterm_log;
+       unsigned int service_uninitialized, uninitialized_count = 0;
+       unsigned int signal_count = 0;
        int sig;
 
-       if (!service_list->sigterm_sent || !service_list->sigterm_sent_to_log)
+       if (!service_list->sigterm_sent)
                sig = SIGTERM;
        else
                sig = SIGKILL;
-       sigterm_log = service_list->sigterm_sent;
        service_list->sigterm_sent = TRUE;
 
        i_warning("Processes aren't dying after reload, sending %s.",
@@ -593,14 +599,25 @@ static void services_kill_timeout(struct service_list *service_list)
 
                if (service->type == SERVICE_TYPE_LOG)
                        log_service = service;
-               else
-                       service_signal(service, sig);
+               else {
+                       signal_count += service_signal(service, sig,
+                                                      &service_uninitialized);
+                       uninitialized_count += service_uninitialized;
+               }
        }
-       /* kill log service later so it could still have a chance of logging
-          something */
-       if (log_service != NULL && sigterm_log) {
-               service_signal(log_service, sig);
+       if (log_service == NULL) {
+               /* log service doesn't exist - shouldn't really happen */
+       } else if (signal_count > 0 || uninitialized_count > 0) {
+               /* kill log service later so the last remaining processes
+                  can still have a chance of logging something */
+       } else {
+               if (!service_list->sigterm_sent_to_log)
+                       sig = SIGTERM;
+               else
+                       sig = SIGKILL;
                service_list->sigterm_sent_to_log = TRUE;
+               signal_count += service_signal(log_service, sig, &service_uninitialized);
+               uninitialized_count += service_uninitialized;
        }
 }
 
index c2f3ee65f0d4cbbab2b875840ae4d51d725501ec..0b56fedbdf708f8fed80de66f5bcde380d670be4 100644 (file)
@@ -171,8 +171,13 @@ void service_list_unref(struct service_list *service_list);
 /* Return path to configuration process socket. */
 const char *services_get_config_socket_path(struct service_list *service_list);
 
-/* Send a signal to all processes in a given service */
-void service_signal(struct service *service, int signo);
+/* Send a signal to all processes in a given service. However, if we're sending
+   a SIGTERM and a process hasn't yet sent the initial status notification,
+   that process is skipped. The number of such skipped processes are stored in
+   uninitialized_count_r. Returns the number of processes that a signal was
+   successfully sent to. */
+unsigned int service_signal(struct service *service, int signo,
+                           unsigned int *uninitialized_count_r);
 /* Notify all processes (if necessary) that no more connections can be handled
    by the service without killing existing connections (TRUE) or that they
    can be (FALSE). */