From: Timo Sirainen Date: Mon, 24 Oct 2016 21:25:38 +0000 (+0300) Subject: director: Fix user move hangs when another move is triggered early. X-Git-Tag: 2.2.26~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1976d64251d1544ea1410adf426cb3447b81c993;p=thirdparty%2Fdovecot%2Fcore.git director: Fix user move hangs when another move is triggered early. Running "doveadm director flush" before the previous flush hadn't fully finished could have caused users to hang until their move timeout: director: Error: Finishing user 3249070169 move timed out, its state may now be inconsistent (state=waiting-for-everyone) --- diff --git a/src/director/director-connection.c b/src/director/director-connection.c index 9aa5b15463..c79e4eb70a 100644 --- a/src/director/director-connection.c +++ b/src/director/director-connection.c @@ -549,8 +549,7 @@ director_user_refresh(struct director_connection *conn, "replacing host %s with %s", username_hash, net_ip2addr(&user->host->ip), net_ip2addr(&host->ip)); ret = TRUE; - } else if (user->kill_state != USER_KILL_STATE_NONE && - user->kill_state < USER_KILL_STATE_DELAY) { + } else if (user->kill_state != USER_KILL_STATE_NONE) { /* user is still being moved - ignore conflicting host updates from other directors who don't yet know about the move. */ dir_debug("user refresh: %u is being moved, " diff --git a/src/director/director.c b/src/director/director.c index 9242979383..1c93c1d765 100644 --- a/src/director/director.c +++ b/src/director/director.c @@ -1092,6 +1092,22 @@ void director_kick_user_hash(struct director *dir, struct director_host *src, director_update_send_version(dir, src, DIRECTOR_VERSION_USER_KICK, cmd); } +static void +director_send_user_killed_everywhere(struct director *dir, + struct director_host *src, + struct director_host *orig_src, + unsigned int username_hash) +{ + if (orig_src == NULL) { + orig_src = dir->self_host; + orig_src->last_seq++; + } + director_update_send(dir, src, t_strdup_printf( + "USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n", + net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq, + username_hash)); +} + void director_user_killed(struct director *dir, unsigned int username_hash) { struct user *user; @@ -1107,10 +1123,18 @@ void director_user_killed(struct director *dir, unsigned int username_hash) case USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY: director_finish_user_kill(dir, user, TRUE); break; + case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED: + dir_debug("User %u kill_state=%s - ignoring USER-KILLED", + username_hash, user_kill_state_names[user->kill_state]); + break; case USER_KILL_STATE_NONE: case USER_KILL_STATE_FLUSHING: case USER_KILL_STATE_DELAY: - case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED: + /* move restarted. state=none can also happen if USER-MOVE was + sent while we were still moving. send back + USER-KILLED-EVERYWHERE to avoid hangs. */ + director_send_user_killed_everywhere(dir, dir->self_host, NULL, + username_hash); break; case USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE: director_user_killed_everywhere(dir, dir->self_host, @@ -1132,15 +1156,7 @@ void director_user_killed_everywhere(struct director *dir, return; director_flush_user(dir, user); - - if (orig_src == NULL) { - orig_src = dir->self_host; - orig_src->last_seq++; - } - director_update_send(dir, src, t_strdup_printf( - "USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n", - net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq, - user->username_hash)); + director_send_user_killed_everywhere(dir, src, orig_src, username_hash); } static void director_state_callback_timeout(struct director *dir)