]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
director: Fix user move hangs when another move is triggered early.
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Mon, 24 Oct 2016 21:25:38 +0000 (00:25 +0300)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Tue, 25 Oct 2016 18:01:23 +0000 (21:01 +0300)
Running "doveadm director flush" before the previous flush hadn't fully
finished could have caused users to hang until their move timeout:

director: Error: Finishing user 3249070169 move timed out, its state may now be inconsistent (state=waiting-for-everyone)

src/director/director-connection.c
src/director/director.c

index 9aa5b1546374933657f77aba4ea67f8404f87fc3..c79e4eb70a7dcd3cdabd21f8a77f52944b5707c9 100644 (file)
@@ -549,8 +549,7 @@ director_user_refresh(struct director_connection *conn,
                          "replacing host %s with %s", username_hash,
                          net_ip2addr(&user->host->ip), net_ip2addr(&host->ip));
                ret = TRUE;
-       } else if (user->kill_state != USER_KILL_STATE_NONE &&
-                  user->kill_state < USER_KILL_STATE_DELAY) {
+       } else if (user->kill_state != USER_KILL_STATE_NONE) {
                /* user is still being moved - ignore conflicting host updates
                   from other directors who don't yet know about the move. */
                dir_debug("user refresh: %u is being moved, "
index 92429793834e6605457447e5a577c18a55b585d8..1c93c1d7656400643094bbaee51fe82365c3a0b8 100644 (file)
@@ -1092,6 +1092,22 @@ void director_kick_user_hash(struct director *dir, struct director_host *src,
        director_update_send_version(dir, src, DIRECTOR_VERSION_USER_KICK, cmd);
 }
 
+static void
+director_send_user_killed_everywhere(struct director *dir,
+                                    struct director_host *src,
+                                    struct director_host *orig_src,
+                                    unsigned int username_hash)
+{
+       if (orig_src == NULL) {
+               orig_src = dir->self_host;
+               orig_src->last_seq++;
+       }
+       director_update_send(dir, src, t_strdup_printf(
+               "USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
+               net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
+               username_hash));
+}
+
 void director_user_killed(struct director *dir, unsigned int username_hash)
 {
        struct user *user;
@@ -1107,10 +1123,18 @@ void director_user_killed(struct director *dir, unsigned int username_hash)
        case USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY:
                director_finish_user_kill(dir, user, TRUE);
                break;
+       case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
+               dir_debug("User %u kill_state=%s - ignoring USER-KILLED",
+                         username_hash, user_kill_state_names[user->kill_state]);
+               break;
        case USER_KILL_STATE_NONE:
        case USER_KILL_STATE_FLUSHING:
        case USER_KILL_STATE_DELAY:
-       case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
+               /* move restarted. state=none can also happen if USER-MOVE was
+                  sent while we were still moving. send back
+                  USER-KILLED-EVERYWHERE to avoid hangs. */
+               director_send_user_killed_everywhere(dir, dir->self_host, NULL,
+                                                    username_hash);
                break;
        case USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE:
                director_user_killed_everywhere(dir, dir->self_host,
@@ -1132,15 +1156,7 @@ void director_user_killed_everywhere(struct director *dir,
                return;
 
        director_flush_user(dir, user);
-
-       if (orig_src == NULL) {
-               orig_src = dir->self_host;
-               orig_src->last_seq++;
-       }
-       director_update_send(dir, src, t_strdup_printf(
-               "USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
-               net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
-               user->username_hash));
+       director_send_user_killed_everywhere(dir, src, orig_src, username_hash);
 }
 
 static void director_state_callback_timeout(struct director *dir)