]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
master: Assume service is successful if its process has existed for 10 seconds
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Thu, 14 Apr 2022 16:12:04 +0000 (18:12 +0200)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Wed, 4 May 2022 05:44:00 +0000 (05:44 +0000)
master code has a special check to start throttling services that are
rapidly dying and have never had any successful exits. However, this exit
check is a bit bad for services whose processes are supposed to never
exit. So if a service has a process that exists for at least 10 seconds
and has initial status notification sent, handle it the same as if the
service had already once exited successfully.

src/master/service-monitor.c
src/master/service-process.c
src/master/service-process.h

index 1fdc93a379a3ffda27efd2f5d6c138722136a96f..99512f18d53c8e8de0910d5eb6e490fe0233875c 100644 (file)
@@ -26,6 +26,7 @@
 #define SERVICE_LOG_DROP_WARNING_DELAY_MSECS 500
 #define MAX_DIE_WAIT_MSECS 5000
 #define SERVICE_MAX_EXIT_FAILURES_IN_SEC 10
+#define SERVICE_MIN_SUCCESSFUL_AGE_SECS 10
 #define SERVICE_PREFORK_MAX_AT_ONCE 10
 
 static void service_monitor_start_extra_avail(struct service *service);
@@ -685,6 +686,28 @@ void services_monitor_stop(struct service_list *service_list, bool wait)
        services_log_deinit(service_list);
 }
 
+static bool service_has_successful_processes(struct service *service)
+{
+       if (service->have_successful_exits)
+               return TRUE;
+
+       /* See if there is a process that has existed for a while and has
+          received the initial status notification. The oldest processes are
+          last in the list, so just scan through all of them. */
+       struct service_process *process = service->processes;
+       for (; process != NULL; process = process->next) {
+               time_t age_secs = ioloop_time - process->create_time;
+               if (age_secs >= SERVICE_MIN_SUCCESSFUL_AGE_SECS &&
+                   process->to_status == NULL) {
+                       /* Remember this so this list doesn't have to be
+                          scanned again. */
+                       service->have_successful_exits = TRUE;
+                       return TRUE;
+               }
+       }
+       return FALSE;
+}
+
 static bool
 service_process_failure(struct service_process *process, int status)
 {
@@ -693,12 +716,14 @@ service_process_failure(struct service_process *process, int status)
 
        service_process_log_status_error(process, status);
        throttle = process->to_status != NULL;
-       if (!throttle && !service->have_successful_exits) {
-               /* this service has seen no successful exits yet.
-                  try to avoid failure storms by throttling the service if it
-                  only keeps failing rapidly. this is no longer done after
-                  one success to avoid intentional DoSing, in case attacker
-                  finds a way to quickly crash his own session. */
+       if (!throttle && !service_has_successful_processes(service)) {
+               /* This service has seen no successful exits yet and no
+                  processes that were already running for a while.
+                  Try to avoid failure storms at Dovecot startup by throttling
+                  the service if it only keeps failing rapidly. This is no
+                  longer done after the service looks to be generailly working,
+                  in case an attacker finds a way to quickly crash their own
+                  session. */
                if (service->exit_failure_last != ioloop_time) {
                        service->exit_failure_last = ioloop_time;
                        service->exit_failures_in_sec = 0;
index a9a9e8ee37feaa03b71243fc18f1205ad48baca1..44ccbfd00248a0caaba9504bd46a03b3910b3828 100644 (file)
@@ -407,6 +407,7 @@ struct service_process *service_process_create(struct service *service)
        process->refcount = 1;
        process->pid = pid;
        process->uid = uid;
+       process->create_time = ioloop_time;
        if (process_forked) {
                process->to_status =
                        timeout_add(SERVICE_FIRST_STATUS_TIMEOUT_SECS * 1000,
index 3d55a685c81e9ef9810d9647e66ba7f2179c3b81..2efea0f348dd685bef573d7b91fb5f912be901d6 100644 (file)
@@ -19,6 +19,8 @@ struct service_process {
           smaller than the correct value. */
        unsigned int total_count;
 
+       /* Timestamp when the process was created */
+       time_t create_time;
        /* time when process started idling, or 0 if we're not idling */
        time_t idle_start;
        /* kill process if it hits idle timeout */