]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
director: Moved all user killing state to struct director_kill_context
authorTimo Sirainen <timo.sirainen@dovecot.fi>
Sun, 16 Oct 2016 22:07:50 +0000 (01:07 +0300)
committerTimo Sirainen <timo.sirainen@dovecot.fi>
Wed, 26 Oct 2016 10:10:37 +0000 (13:10 +0300)
This should make it a bit easier to understand the life time of user
killing. It also simplifies code by removing struct
director_user_kill_finish_ctx.

Finally, this already reduces memory usage with 32bit systems, and would
make it possible to reduce also on 64bit systems if timestamp is shrank to
31 bits and weak bit moved after it. I'm not sure if that would be better
for performance though. In any case it would provide free space for 4 extra
bytes if that were needed in future.

src/director/director-connection.c
src/director/director.c
src/director/director.h
src/director/test-user-directory.c
src/director/user-directory.c
src/director/user-directory.h

index 0df8ed010c334501f5593f6f949957f3ef0bd03c..b3c84e5becd4f800e20e9b9d2bc0342fcd6a014d 100644 (file)
@@ -572,11 +572,11 @@ director_user_refresh(struct director_connection *conn,
                        str_printfa(str, ",handshaking,recv_ts=%ld",
                                    (long)timestamp);
                }
-               if (user->to_move != NULL)
-                       str_append(str, ",moving");
                if (USER_IS_BEING_KILLED(user)) {
+                       if (user->kill_ctx->to_move != NULL)
+                               str_append(str, ",moving");
                        str_printfa(str, ",kill_state=%s",
-                                   user_kill_state_names[user->kill_state]);
+                                   user_kill_state_names[user->kill_ctx->kill_state]);
                }
                str_append_c(str, ')');
                i_error("%s", str_c(str));
index 37be3295949f4e39dfd824d8708083458c6d8d56..dc798d5649cc7b6fce7c29a04e307808b67172b7 100644 (file)
@@ -710,35 +710,15 @@ void director_update_user_weak(struct director *dir, struct director_host *src,
        }
 }
 
-struct director_user_kill_finish_ctx {
-       struct director *dir;
-       unsigned int username_hash;
-       struct ip_addr host_ip;
-       struct user *user;
-       struct program_client *pclient;
-       struct ostream *reply;
-       char *socket_path;
-};
-
 static void
-director_flush_user_continue(int result,
-                            struct director_user_kill_finish_ctx *ctx)
+director_flush_user_continue(int result, struct director_kill_context *ctx)
 {
+       struct director *dir = ctx->dir;
        struct user *user =
-               user_directory_lookup(ctx->dir->users, ctx->username_hash);
+               user_directory_lookup(dir->users, ctx->username_hash);
+
+       ctx->callback_pending = FALSE;
 
-       if (user == NULL) {
-               dir_debug("User %u freed while flushing, result=%d",
-                         ctx->username_hash, result);
-       } else if (user->kill_state != USER_KILL_STATE_FLUSHING) {
-               dir_debug("User %u move state changed while flushing, result=%d",
-                         ctx->username_hash, result);
-       } else {
-               dir_debug("Flushing user %u finished, result=%d",
-                         ctx->username_hash, result);
-               director_user_kill_finish_delayed(ctx->dir, user,
-                                                 result == 1);
-       }
        if (result == 0) {
                struct istream *is = iostream_temp_finish(&ctx->reply, (size_t)-1);
                char *data;
@@ -761,13 +741,25 @@ director_flush_user_continue(int result,
                o_stream_unref(&ctx->reply);
        }
        program_client_destroy(&ctx->pclient);
-       i_free(ctx->socket_path);
-       i_free(ctx);
+
+       if (!DIRECTOR_KILL_CONTEXT_IS_VALID(user, ctx)) {
+               /* user was already freed - ignore */
+               dir_debug("User %u freed while flushing, result=%d",
+                         ctx->username_hash, result);
+               i_assert(ctx->to_move == NULL);
+               i_free(ctx);
+       } else {
+               /* ctx is freed later via user->kill_ctx */
+               dir_debug("Flushing user %u finished, result=%d",
+                         ctx->username_hash, result);
+               director_user_kill_finish_delayed(dir, user, result == 1);
+       }
 }
 
 static void
 director_flush_user(struct director *dir, struct user *user)
 {
+       struct director_kill_context *ctx = user->kill_ctx;
        struct var_expand_table tab[] = {
                { 'i', net_ip2addr(&user->host->ip), "ip" },
                { 'h', user->host->hostname, "host" },
@@ -779,16 +771,12 @@ director_flush_user(struct director *dir, struct user *user)
           would be redundant since they're all supposed to be performing the
           same flush task to the same backend. */
        if (*dir->set->director_flush_socket == '\0' ||
-           !user->kill_is_self_initiated) {
+           !ctx->kill_is_self_initiated) {
                director_user_kill_finish_delayed(dir, user, FALSE);
                return;
        }
 
-       struct director_user_kill_finish_ctx *ctx =
-               i_new(struct director_user_kill_finish_ctx, 1);
-       ctx->username_hash = user->username_hash;
        ctx->host_ip = user->host->ip;
-       ctx->dir = dir;
 
        string_t *s_sock = str_new(default_pool, 32);
        var_expand(s_sock, dir->set->director_flush_socket, tab);
@@ -804,7 +792,7 @@ director_flush_user(struct director *dir, struct user *user)
        const char *const args[] = {"FLUSH",
                t_strdup_printf("%u", user->username_hash), NULL};
 
-       user->kill_state = USER_KILL_STATE_FLUSHING;
+       ctx->kill_state = USER_KILL_STATE_FLUSHING;
        dir_debug("Flushing user %u via %s", user->username_hash,
                  ctx->socket_path);
 
@@ -825,19 +813,25 @@ director_flush_user(struct director *dir, struct user *user)
                                                           net_ip2addr(&user->host->ip)));
        o_stream_set_no_error_handling(ctx->reply, TRUE);
        program_client_set_output(ctx->pclient, ctx->reply);
+       ctx->callback_pending = TRUE;
        program_client_run_async(ctx->pclient, director_flush_user_continue, ctx);
 }
 
-static void director_user_move_free(struct director *dir, struct user *user)
+static void director_user_move_free(struct user *user)
 {
-       i_assert(user->to_move != NULL);
+       struct director *dir = user->kill_ctx->dir;
+       struct director_kill_context *kill_ctx = user->kill_ctx;
+
+       i_assert(kill_ctx != NULL);
 
        dir_debug("User %u move finished at state=%s", user->username_hash,
-                 user_kill_state_names[user->kill_state]);
+                 user_kill_state_names[kill_ctx->kill_state]);
 
-       user->kill_is_self_initiated = FALSE;
-       user->kill_state = USER_KILL_STATE_NONE;
-       timeout_remove(&user->to_move);
+       if (kill_ctx->to_move != NULL)
+               timeout_remove(&kill_ctx->to_move);
+       i_free(kill_ctx->socket_path);
+       i_free(kill_ctx);
+       user->kill_ctx = NULL;
 
        i_assert(dir->users_moving_count > 0);
        dir->users_moving_count--;
@@ -846,62 +840,55 @@ static void director_user_move_free(struct director *dir, struct user *user)
 }
 
 static void
-director_user_kill_finish_delayed_to(struct director_user_kill_finish_ctx *ctx)
+director_user_kill_finish_delayed_to(struct user *user)
 {
-       i_assert(ctx->user->kill_state == USER_KILL_STATE_DELAY);
+       i_assert(user->kill_ctx != NULL);
+       i_assert(user->kill_ctx->kill_state == USER_KILL_STATE_DELAY);
 
-       director_user_move_free(ctx->dir, ctx->user);
-       i_free(ctx);
+       director_user_move_free(user);
 }
 
 static void
 director_user_kill_finish_delayed(struct director *dir, struct user *user,
                                  bool skip_delay)
 {
-       struct director_user_kill_finish_ctx *ctx;
-
        if (skip_delay) {
-               director_user_move_free(dir, user);
+               user->kill_ctx->kill_state = USER_KILL_STATE_NONE;
+               director_user_move_free(user);
                return;
        }
 
-       ctx = i_new(struct director_user_kill_finish_ctx, 1);
-       ctx->dir = dir;
-       ctx->user = user;
-
-       user->kill_state = USER_KILL_STATE_DELAY;
+       user->kill_ctx->kill_state = USER_KILL_STATE_DELAY;
 
        /* wait for a while for the kills to finish in the backend server,
           so there are no longer any processes running for the user before we
           start letting new in connections to the new server. */
-       timeout_remove(&user->to_move);
-       user->to_move = timeout_add(dir->set->director_user_kick_delay * 1000,
-                                   director_user_kill_finish_delayed_to, ctx);
+       timeout_remove(&user->kill_ctx->to_move);
+       user->kill_ctx->to_move =
+               timeout_add(dir->set->director_user_kick_delay * 1000,
+                           director_user_kill_finish_delayed_to, user);
 }
 
-struct director_kill_context {
-       struct director *dir;
-       unsigned int username_hash;
-       bool kill_is_self_initiated;
-};
-
 static void
 director_finish_user_kill(struct director *dir, struct user *user, bool self)
 {
-       i_assert(user->kill_state != USER_KILL_STATE_FLUSHING);
-       i_assert(user->kill_state != USER_KILL_STATE_DELAY);
+       struct director_kill_context *kill_ctx = user->kill_ctx;
+
+       i_assert(kill_ctx != NULL);
+       i_assert(kill_ctx->kill_state != USER_KILL_STATE_FLUSHING);
+       i_assert(kill_ctx->kill_state != USER_KILL_STATE_DELAY);
 
        if (dir->right == NULL) {
                /* we're alone */
                director_flush_user(dir, user);
        } else if (self ||
-                  user->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
+                  kill_ctx->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
                director_connection_send(dir->right, t_strdup_printf(
                        "USER-KILLED\t%u\n", user->username_hash));
-               user->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE;
+               kill_ctx->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE;
        } else {
-               i_assert(user->kill_state == USER_KILL_STATE_KILLING);
-               user->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY;
+               i_assert(kill_ctx->kill_state == USER_KILL_STATE_KILLING);
+               kill_ctx->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY;
        }
 }
 
@@ -935,17 +922,19 @@ static void director_kill_user_callback(enum ipc_client_cmd_state state,
                break;
        }
 
+       ctx->callback_pending = FALSE;
+
        user = user_directory_lookup(ctx->dir->users, ctx->username_hash);
-       if (user == NULL) {
+       if (!DIRECTOR_KILL_CONTEXT_IS_VALID(user, ctx)) {
                /* user was already freed - ignore */
-       } else if (user->kill_state == USER_KILL_STATE_KILLING ||
-                  user->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
+               i_assert(ctx->to_move == NULL);
+               i_free(ctx);
+       } else {
+               i_assert(ctx->kill_state == USER_KILL_STATE_KILLING ||
+                        ctx->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED);
                /* we were still waiting for the kill notification */
                director_finish_user_kill(ctx->dir, user, ctx->kill_is_self_initiated);
-       } else {
-               /* we don't currently want to kill the user */
        }
-       i_free(ctx);
 }
 
 static void director_user_move_throttled(unsigned int new_events_count,
@@ -957,19 +946,17 @@ static void director_user_move_throttled(unsigned int new_events_count,
 
 static void director_user_move_timeout(struct user *user)
 {
-       i_assert(user->kill_state != USER_KILL_STATE_FLUSHING);
-       i_assert(user->kill_state != USER_KILL_STATE_DELAY);
+       i_assert(user->kill_ctx != NULL);
+       i_assert(user->kill_ctx->kill_state != USER_KILL_STATE_FLUSHING);
+       i_assert(user->kill_ctx->kill_state != USER_KILL_STATE_DELAY);
 
        if (log_throttle_accept(user_move_throttle)) {
                i_error("Finishing user %u move timed out, "
                        "its state may now be inconsistent (state=%s)",
                        user->username_hash,
-                       user_kill_state_names[user->kill_state]);
+                       user_kill_state_names[user->kill_ctx->kill_state]);
        }
-
-       /* FIXME: shouldn't use global director, but for now there's no easy
-          way to get access to it otherwise */
-       director_user_move_free(director, user);
+       director_user_move_free(user);
 }
 
 void director_move_user(struct director *dir, struct director_host *src,
@@ -1011,18 +998,19 @@ void director_move_user(struct director *dir, struct director_host *src,
                user->timestamp = ioloop_time;
        }
        if (!USER_IS_BEING_KILLED(user)) {
-               ctx = i_new(struct director_kill_context, 1);
+               user->kill_ctx = ctx = i_new(struct director_kill_context, 1);
                ctx->dir = dir;
                ctx->username_hash = username_hash;
                ctx->kill_is_self_initiated = src->self;
 
                dir->users_moving_count++;
-               user->to_move = timeout_add(DIRECTOR_USER_MOVE_TIMEOUT_MSECS,
-                                           director_user_move_timeout, user);
-               user->kill_is_self_initiated = src->self;
-               user->kill_state = USER_KILL_STATE_KILLING;
+               ctx->to_move = timeout_add(DIRECTOR_USER_MOVE_TIMEOUT_MSECS,
+                                          director_user_move_timeout, user);
+               ctx->kill_state = USER_KILL_STATE_KILLING;
+
                cmd = t_strdup_printf("proxy\t*\tKICK-DIRECTOR-HASH\t%u",
                                      username_hash);
+               ctx->callback_pending = TRUE;
                ipc_client_cmd(dir->ipc_proxy, cmd,
                               director_kill_user_callback, ctx);
        } else {
@@ -1030,7 +1018,7 @@ void director_move_user(struct director *dir, struct director_host *src,
                   finished. We'll just continue wherever we left off
                   earlier. */
                dir_debug("User %u move restarted - previous kill_state=%s",
-                         username_hash, user_kill_state_names[user->kill_state]);
+                         username_hash, user_kill_state_names[user->kill_ctx->kill_state]);
        }
 
        if (orig_src == NULL) {
@@ -1140,19 +1128,19 @@ void director_user_killed(struct director *dir, unsigned int username_hash)
        struct user *user;
 
        user = user_directory_lookup(dir->users, username_hash);
-       if (user == NULL)
+       if (user == NULL || !USER_IS_BEING_KILLED(user))
                return;
 
-       switch (user->kill_state) {
+       switch (user->kill_ctx->kill_state) {
        case USER_KILL_STATE_KILLING:
-               user->kill_state = USER_KILL_STATE_KILLING_NOTIFY_RECEIVED;
+               user->kill_ctx->kill_state = USER_KILL_STATE_KILLING_NOTIFY_RECEIVED;
                break;
        case USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY:
                director_finish_user_kill(dir, user, TRUE);
                break;
        case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
                dir_debug("User %u kill_state=%s - ignoring USER-KILLED",
-                         username_hash, user_kill_state_names[user->kill_state]);
+                         username_hash, user_kill_state_names[user->kill_ctx->kill_state]);
                break;
        case USER_KILL_STATE_NONE:
        case USER_KILL_STATE_FLUSHING:
@@ -1183,9 +1171,14 @@ void director_user_killed_everywhere(struct director *dir,
                          username_hash);
                return;
        }
-       if (user->kill_state != USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE) {
+       if (!USER_IS_BEING_KILLED(user)) {
+               dir_debug("User %u is no longer being killed - ignoring USER-KILLED-EVERYWHERE",
+                         username_hash);
+               return;
+       }
+       if (user->kill_ctx->kill_state != USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE) {
                dir_debug("User %u kill_state=%s - ignoring USER-KILLED-EVERYWHERE",
-                         username_hash, user_kill_state_names[user->kill_state]);
+                         username_hash, user_kill_state_names[user->kill_ctx->kill_state]);
                return;
        }
 
@@ -1230,6 +1223,22 @@ void director_update_send_version(struct director *dir,
        }
 }
 
+static void director_user_freed(struct user *user)
+{
+       if (user->kill_ctx != NULL) {
+               /* director_user_expire is very short. user expired before
+                  moving the user finished or timed out. */
+               if (user->kill_ctx->callback_pending) {
+                       /* kill_ctx is used as a callback parameter.
+                          only remove the timeout and finish the free later. */
+                       if (user->kill_ctx->to_move != NULL)
+                               timeout_remove(&user->kill_ctx->to_move);
+               } else {
+                       director_user_move_free(user);
+               }
+       }
+}
+
 struct director *
 director_init(const struct director_settings *set,
              const struct ip_addr *listen_ip, in_port_t listen_port,
@@ -1246,7 +1255,8 @@ director_init(const struct director_settings *set,
        i_array_init(&dir->pending_requests, 16);
        i_array_init(&dir->connections, 8);
        dir->users = user_directory_init(set->director_user_expire,
-                                        set->director_username_hash);
+                                        set->director_username_hash,
+                                        director_user_freed);
        dir->mail_hosts = mail_hosts_init(set->director_consistent_hashing);
 
        dir->ipc_proxy = ipc_client_init(DIRECTOR_IPC_PROXY_PATH);
index 28bfad4b4eefa2b6cd3b2683cadab4c86933aad7..35e34fb08132508b836c8b2cece6de183d7361f4 100644 (file)
@@ -63,6 +63,31 @@ extern const char *user_kill_state_names[USER_KILL_STATE_DELAY+1];
 
 typedef void director_state_change_callback_t(struct director *dir);
 
+/* When a user gets freed, the kill_ctx may still be left alive. It's also
+   possible for the user to come back, in which case the kill_ctx is usually
+   NULL, but another kill could have also started. The previous kill_ctx is
+   valid only if it matches the current user's kill_ctx. */
+#define DIRECTOR_KILL_CONTEXT_IS_VALID(user, ctx) \
+       ((user) != NULL && (user)->kill_ctx == ctx)
+
+struct director_kill_context {
+       struct director *dir;
+       unsigned int username_hash;
+       bool kill_is_self_initiated;
+       bool callback_pending;
+
+       enum user_kill_state kill_state;
+       /* Move timeout to make sure user's connections won't silently hang
+          indefinitely if there is some trouble moving it. */
+       struct timeout *to_move;
+
+       /* these are set only for director_flush_socket handling: */
+       struct ip_addr host_ip;
+       struct program_client *pclient;
+       struct ostream *reply;
+       char *socket_path;
+};
+
 struct director {
        const struct director_settings *set;
 
index 87a7d49afb2bc0ba51e032fafce71205b1c80e07..a392da369fcde6b7d7a1c257a8e61217b4694a8c 100644 (file)
@@ -42,7 +42,7 @@ static void test_user_directory_ascending(void)
        unsigned int i;
 
        test_begin("user directory ascending");
-       dir = user_directory_init(USER_DIR_TIMEOUT, "%u");
+       dir = user_directory_init(USER_DIR_TIMEOUT, "%u", NULL);
        (void)user_directory_add(dir, 1, host, ioloop_time + count+1);
 
        for (i = 0; i < count; i++)
@@ -60,7 +60,7 @@ static void test_user_directory_descending(void)
        unsigned int i;
 
        test_begin("user directory descending");
-       dir = user_directory_init(USER_DIR_TIMEOUT, "%u");
+       dir = user_directory_init(USER_DIR_TIMEOUT, "%u", NULL);
 
        for (i = 0; i < count; i++)
                (void)user_directory_add(dir, i+1, host, ioloop_time - i);
@@ -77,7 +77,7 @@ static void test_user_directory_random(void)
        unsigned int i, count = 10000 + rand()%10000;
 
        test_begin("user directory random");
-       dir = user_directory_init(USER_DIR_TIMEOUT, "%u");
+       dir = user_directory_init(USER_DIR_TIMEOUT, "%u", NULL);
        for (i = 0; i < count; i++) {
                if (rand() % 10 == 0)
                        timestamp = ioloop_time;
index 3c60259a9ca017eeefbfa324e6e136b3cb04e3c2..1e57e5784cc4ec87522b00895c57d38b2cc3c706 100644 (file)
@@ -28,6 +28,7 @@ struct user_directory {
        struct user *prev_insert_pos;
 
        ARRAY(struct user_directory_iter *) iters;
+       void (*user_free_hook)(struct user *);
 
        char *username_hash_fmt;
        unsigned int timeout_secs;
@@ -54,11 +55,8 @@ static void user_free(struct user_directory *dir, struct user *user)
        i_assert(user->host->user_count > 0);
        user->host->user_count--;
 
-       if (user->to_move != NULL) {
-               /* director_user_expire is very short. user expired before
-                  moving the user finished or timed out. */
-               timeout_remove(&user->to_move);
-       }
+       if (dir->user_free_hook != NULL)
+               dir->user_free_hook(user);
        user_move_iters(dir, user);
 
        hash_table_remove(dir->hash, POINTER_CAST(user->username_hash));
@@ -281,7 +279,8 @@ bool user_directory_user_is_near_expiring(struct user_directory *dir,
 }
 
 struct user_directory *
-user_directory_init(unsigned int timeout_secs, const char *username_hash_fmt)
+user_directory_init(unsigned int timeout_secs, const char *username_hash_fmt,
+                   void (*user_free_hook)(struct user *))
 {
        struct user_directory *dir;
 
@@ -298,6 +297,7 @@ user_directory_init(unsigned int timeout_secs, const char *username_hash_fmt)
        i_assert(dir->timeout_secs/2 > dir->user_near_expiring_secs);
 
        dir->username_hash_fmt = i_strdup(username_hash_fmt);
+       dir->user_free_hook = user_free_hook;
        hash_table_create_direct(&dir->hash, default_pool, 0);
        i_array_init(&dir->iters, 8);
        return dir;
index f4d323cb706fdb890b1c4d157eb706b0a496f656..e5c9b265694747a7a11abaf2e260fedd69d4d3ac 100644 (file)
@@ -1,10 +1,8 @@
 #ifndef USER_DIRECTORY_H
 #define USER_DIRECTORY_H
 
-#include "director.h"
-
 #define USER_IS_BEING_KILLED(user) \
-       ((user)->kill_state != USER_KILL_STATE_NONE)
+       ((user)->kill_ctx != NULL)
 
 struct user {
        /* sorted by time */
@@ -18,25 +16,21 @@ struct user {
 
        struct mail_host *host;
 
-       /* Move timeout to make sure user's connections won't silently hang
-          indefinitely if there is some trouble moving it. */
-       struct timeout *to_move;
-       /* If not USER_KILL_STATE_NONE, don't allow new connections until all
+       /* If non-NULL, don't allow new connections until all
           directors have killed the user's connections. */
-       enum user_kill_state kill_state;
+       struct director_kill_context *kill_ctx;
 
        /* TRUE, if the user's timestamp was close to being expired and we're
           now doing a ring-wide sync for this user to make sure we don't
           assign conflicting hosts to it */
        unsigned int weak:1;
-       /* TRUE, if this server initiated the user's kill. */
-       unsigned int kill_is_self_initiated:1;
 };
 
 /* Create a new directory. Users are dropped if their time gets older
    than timeout_secs. */
 struct user_directory *
-user_directory_init(unsigned int timeout_secs, const char *username_hash_fmt);
+user_directory_init(unsigned int timeout_secs, const char *username_hash_fmt,
+                   void (*user_free_hook)(struct user *));
 void user_directory_deinit(struct user_directory **dir);
 
 /* Returns the number of users currently in directory. */