]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
director: Avoid USER loops when ring latency is over 1 second
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Sat, 25 Nov 2017 23:19:35 +0000 (01:19 +0200)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Sun, 26 Nov 2017 11:04:31 +0000 (13:04 +0200)
Do this by adding a timestamp parameter to USER events. This way if it
takes over 1 second for the USER event to traverse the ring, it won't get
into an infinite loop getting the user updated over and over again.

src/director/director-connection.c
src/director/director.c
src/director/director.h
src/director/user-directory.h

index 593664099fff58b2f5df8f257e059fca60aa11ab..c05903bd9b427bbc5df2428a11e66f585c22d8c2 100644 (file)
@@ -682,12 +682,25 @@ director_user_refresh(struct director_connection *conn,
                user->host->user_count++;
                ret = TRUE;
        }
-       if (timestamp == ioloop_time && (time_t)user->timestamp != timestamp) {
+       /* Update user's timestamp if it's higher than the current one. Note
+          that we'll preserve the original timestamp. This is important when
+          the director ring is slow and a single USER can traverse through
+          the ring more than a second. We don't want to get into a loop where
+          the same USER goes through the ring forever. */
+       if ((time_t)user->timestamp < timestamp) {
+               /* NOTE: This makes the users list somewhat out-of-order.
+                  It's not a big problem - most likely it's only a few seconds
+                  difference. The worst that can happen is that some users
+                  take up memory that should have been freed already. */
+               dir_debug("user refresh: %u refreshed timestamp from %u to %ld",
+                         username_hash, user->timestamp, (long)timestamp);
                user_directory_refresh(users, user);
+               user->timestamp = timestamp;
                ret = TRUE;
+       } else {
+               dir_debug("user refresh: %u ignored timestamp %ld (we have %u)",
+                         username_hash, (long)timestamp, user->timestamp);
        }
-       dir_debug("user refresh: %u refreshed timeout to %ld",
-                 username_hash, (long)user->timestamp);
 
        if (unset_weak_user) {
                /* user is no longer weak. handle pending requests for
@@ -765,10 +778,12 @@ director_cmd_user(struct director_connection *conn,
        struct mail_host *host;
        struct user *user;
        bool forced;
+       time_t timestamp = ioloop_time;
 
-       if (str_array_length(args) != 2 ||
+       if (str_array_length(args) < 2 ||
            str_to_uint(args[0], &username_hash) < 0 ||
-           net_addr2ip(args[1], &ip) < 0) {
+           net_addr2ip(args[1], &ip) < 0 ||
+           (args[2] != NULL && str_to_time(args[2], &timestamp) < 0)) {
                director_cmd_error(conn, "Invalid parameters");
                return FALSE;
        }
@@ -780,7 +795,7 @@ director_cmd_user(struct director_connection *conn,
        }
 
        if (director_user_refresh(conn, username_hash,
-                                 host, ioloop_time, FALSE, &forced, &user)) {
+                                 host, timestamp, FALSE, &forced, &user)) {
                struct director_host *src_host =
                        forced ? conn->dir->self_host : conn->host;
                i_assert(!user->weak);
index 5faf8ca8052cc432c7d791beb78018930505924c..30749a41d11a0df9fe35bf946693aad17e7c4cc7 100644 (file)
@@ -730,11 +730,24 @@ void director_flush_host(struct director *dir, struct director_host *src,
 void director_update_user(struct director *dir, struct director_host *src,
                          struct user *user)
 {
-       i_assert(src != NULL);
+       struct director_connection *const *connp;
 
+       i_assert(src != NULL);
        i_assert(!user->weak);
-       director_update_send(dir, src, t_strdup_printf("USER\t%u\t%s\n",
-               user->username_hash, user->host->ip_str));
+
+       array_foreach(&dir->connections, connp) {
+               if (director_connection_get_host(*connp) == src)
+                       continue;
+
+               if (director_connection_get_minor_version(*connp) >= DIRECTOR_VERSION_USER_TIMESTAMP) {
+                       director_connection_send(*connp, t_strdup_printf(
+                               "USER\t%u\t%s\t%u\n", user->username_hash, user->host->ip_str,
+                               user->timestamp));
+               } else {
+                       director_connection_send(*connp, t_strdup_printf(
+                               "USER\t%u\t%s\n", user->username_hash, user->host->ip_str));
+               }
+       }
 }
 
 void director_update_user_weak(struct director *dir, struct director_host *src,
index fa7d22d5c90112e3731e4953d5f1bd26ebc96d99..2f7acd4ccb5f1ef83e3ecf10101a463d49acbece 100644 (file)
@@ -28,6 +28,8 @@
 #define DIRECTOR_VERSION_USER_KICK_ALT 8
 /* Users are sent as "U" command in handshake */
 #define DIRECTOR_VERSION_HANDSHAKE_U_CMD 9
+/* USER event with timestamp supported */
+#define DIRECTOR_VERSION_USER_TIMESTAMP 9
 
 /* Minimum time between even attempting to communicate with a director that
    failed due to a protocol error. */
index 690e07ae5ea4caca7d0733cc56f2f3c4f027bb3e..5b46330a664d2637189c2e18c0183ad5f1e425c2 100644 (file)
@@ -5,7 +5,9 @@
        ((user)->kill_ctx != NULL)
 
 struct user {
-       /* sorted by time */
+       /* Approximately sorted by time (except during handshaking).
+          The sorting order may be constantly wrong a few seconds here and
+          there. */
        struct user *prev, *next;
 
        /* first 32 bits of MD5(username). collisions are quite unlikely, but