]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-master: If connect() to backend UNIX socket is retried, log a warning
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Fri, 13 Apr 2018 13:14:18 +0000 (16:14 +0300)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Fri, 8 Jun 2018 08:42:25 +0000 (11:42 +0300)
For example if imap-login process needs to retry before it successfully
connects to imap process's socket, a warning is logged. This warning is
important because it means that the imap-login process may have been
sleeping up to 0.5 seconds and causing all the other connections to hang
during it.

It would be better to make this retrying asynchronous, but before spending
time on doing that, lets see if this warning is ever even being logged.

src/lib-master/master-auth.c

index 19a60d10c32f142ec7265883cb2b36928404a86f..b8d0a37603ac29217eae35466d375be54464b4ce 100644 (file)
@@ -5,6 +5,7 @@
 #include "fdpass.h"
 #include "buffer.h"
 #include "hash.h"
+#include "time-util.h"
 #include "master-service-private.h"
 #include "master-auth.h"
 
@@ -12,6 +13,7 @@
 #include <sys/stat.h>
 
 #define SOCKET_CONNECT_RETRY_MSECS 500
+#define SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS 2
 #define MASTER_AUTH_REQUEST_TIMEOUT_MSECS (MASTER_LOGIN_TIMEOUT_SECS/2*1000)
 
 struct master_auth_connection {
@@ -35,6 +37,7 @@ struct master_auth {
        pool_t pool;
 
        const char *default_path;
+       time_t last_connect_warning;
 
        unsigned int tag_counter;
        HASH_TABLE(void *, struct master_auth_connection *) connections;
@@ -182,8 +185,28 @@ void master_auth_request_full(struct master_auth *auth,
        buffer_append(buf, &req, sizeof(req));
        buffer_append(buf, params->data, req.data_size);
 
-       conn->fd = net_connect_unix_with_retries(conn->path,
-                                                SOCKET_CONNECT_RETRY_MSECS);
+       conn->fd = net_connect_unix(conn->path);
+       if (conn->fd == -1 && errno == EAGAIN) {
+               /* Couldn't connect to the socket immediately. This will add
+                  a delay that causes hangs to the whole process, which won't
+                  be obvious unless we log a warning. FIXME: The wait could
+                  be asynchronous. */
+               struct timeval start_time;
+
+               io_loop_time_refresh();
+               start_time = ioloop_timeval;
+               conn->fd = net_connect_unix_with_retries(conn->path,
+                       SOCKET_CONNECT_RETRY_MSECS);
+               io_loop_time_refresh();
+               if (conn->fd != -1 &&
+                   ioloop_time - auth->last_connect_warning >=
+                   SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS) {
+                       i_warning("net_connect_unix(%s) succeeded only after retrying - "
+                                 "took %lld us", conn->path,
+                                 timeval_diff_usecs(&ioloop_timeval, &start_time));
+                       auth->last_connect_warning = ioloop_time;
+               }
+       }
        if (conn->fd == -1) {
                i_error("net_connect_unix(%s) failed: %m%s",
                        conn->path, errno != EAGAIN ? "" :