]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-master: If connect() to backend UNIX socket is retried, log a warning
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Fri, 13 Apr 2018 13:14:18 +0000 (16:14 +0300)
committerVille Savolainen <ville.savolainen@dovecot.fi>
Fri, 18 May 2018 14:08:08 +0000 (17:08 +0300)
For example if imap-login process needs to retry before it successfully
connects to imap process's socket, a warning is logged. This warning is
important because it means that the imap-login process may have been
sleeping up to 0.5 seconds and causing all the other connections to hang
during it.

It would be better to make this retrying asynchronous, but before spending
time on doing that, lets see if this warning is ever even being logged.

src/lib-master/master-auth.c

index fb773d011a8c66395f3d0fce5f06ecedf1335f18..6aba697ad7022ab043382a40104f05d7655ad38f 100644 (file)
@@ -5,6 +5,7 @@
 #include "fdpass.h"
 #include "buffer.h"
 #include "hash.h"
+#include "time-util.h"
 #include "master-service-private.h"
 #include "master-auth.h"
 
@@ -12,6 +13,7 @@
 #include <sys/stat.h>
 
 #define SOCKET_CONNECT_RETRY_MSECS 500
+#define SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS 2
 #define MASTER_AUTH_REQUEST_TIMEOUT_MSECS (MASTER_LOGIN_TIMEOUT_SECS/2*1000)
 
 struct master_auth_connection {
@@ -35,6 +37,7 @@ struct master_auth {
        pool_t pool;
 
        const char *default_path;
+       time_t last_connect_warning;
 
        unsigned int tag_counter;
        HASH_TABLE(void *, struct master_auth_connection *) connections;
@@ -185,8 +188,28 @@ void master_auth_request_full(struct master_auth *auth,
        buffer_append(buf, &req, sizeof(req));
        buffer_append(buf, params->data, req.data_size);
 
-       conn->fd = net_connect_unix_with_retries(conn->path,
-                                                SOCKET_CONNECT_RETRY_MSECS);
+       conn->fd = net_connect_unix(conn->path);
+       if (conn->fd == -1 && errno == EAGAIN) {
+               /* Couldn't connect to the socket immediately. This will add
+                  a delay that causes hangs to the whole process, which won't
+                  be obvious unless we log a warning. FIXME: The wait could
+                  be asynchronous. */
+               struct timeval start_time;
+
+               io_loop_time_refresh();
+               start_time = ioloop_timeval;
+               conn->fd = net_connect_unix_with_retries(conn->path,
+                       SOCKET_CONNECT_RETRY_MSECS);
+               io_loop_time_refresh();
+               if (conn->fd != -1 &&
+                   ioloop_time - auth->last_connect_warning >=
+                   SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS) {
+                       i_warning("net_connect_unix(%s) succeeded only after retrying - "
+                                 "took %lld us", conn->path,
+                                 timeval_diff_usecs(&ioloop_timeval, &start_time));
+                       auth->last_connect_warning = ioloop_time;
+               }
+       }
        if (conn->fd == -1) {
                i_error("net_connect_unix(%s) failed: %m%s",
                        conn->path, errno != EAGAIN ? "" :