From f3e77a037a7ceef76d0d45432fb58c003dd61b32 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Sun, 24 Mar 2013 17:21:49 +0200 Subject: [PATCH] replicator: If sync fails, retry it in 5 minutes. --- src/replication/replicator/replicator-queue.c | 50 ++++++++++++++++--- src/replication/replicator/replicator-queue.h | 6 ++- src/replication/replicator/replicator.c | 5 +- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/replication/replicator/replicator-queue.c b/src/replication/replicator/replicator-queue.c index 790617067e..c2d58fc645 100644 --- a/src/replication/replicator/replicator-queue.c +++ b/src/replication/replicator/replicator-queue.c @@ -30,6 +30,7 @@ struct replicator_queue { ARRAY(struct replicator_sync_lookup) sync_lookups; unsigned int full_sync_interval; + unsigned int failure_resync_interval; void (*change_callback)(void *context); void *change_context; @@ -50,6 +51,18 @@ static int user_priority_cmp(const void *p1, const void *p2) return -1; if (user1->last_fast_sync > user2->last_fast_sync) return 1; + } else if (user1->last_sync_failed != user2->last_sync_failed) { + /* resync failures first */ + if (user1->last_sync_failed) + return -1; + else + return 1; + } else if (user1->last_sync_failed) { + /* both have failed. resync failures with fast-sync timestamp */ + if (user1->last_fast_sync < user2->last_fast_sync) + return -1; + if (user1->last_fast_sync > user2->last_fast_sync) + return 1; } else { /* nothing to replicate, but do still periodic full syncs */ if (user1->last_full_sync < user2->last_full_sync) @@ -60,12 +73,15 @@ static int user_priority_cmp(const void *p1, const void *p2) return 0; } -struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval) +struct replicator_queue * +replicator_queue_init(unsigned int full_sync_interval, + unsigned int failure_resync_interval) { struct replicator_queue *queue; queue = i_new(struct replicator_queue, 1); queue->full_sync_interval = full_sync_interval; + queue->failure_resync_interval = failure_resync_interval; queue->user_queue = priorityq_init(user_priority_cmp, 1024); hash_table_create(&queue->user_hash, default_pool, 1024, str_hash, strcmp); @@ -182,13 +198,35 @@ void replicator_queue_remove(struct replicator_queue *queue, queue->change_callback(queue->change_context); } +static bool +replicator_queue_can_sync_now(struct replicator_queue *queue, + struct replicator_user *user, + unsigned int *next_secs_r) +{ + time_t next_sync; + + if (user->priority != REPLICATION_PRIORITY_NONE) + return TRUE; + + if (user->last_sync_failed) { + next_sync = user->last_fast_sync + + queue->failure_resync_interval; + } else { + next_sync = user->last_full_sync + queue->full_sync_interval; + } + if (next_sync <= ioloop_time) + return TRUE; + + *next_secs_r = next_sync - ioloop_time; + return FALSE; +} + struct replicator_user * replicator_queue_pop(struct replicator_queue *queue, unsigned int *next_secs_r) { struct priorityq_item *item; struct replicator_user *user; - time_t next_full_sync; item = priorityq_peek(queue->user_queue); if (item == NULL) { @@ -197,12 +235,8 @@ replicator_queue_pop(struct replicator_queue *queue, return NULL; } user = (struct replicator_user *)item; - - next_full_sync = user->last_full_sync + queue->full_sync_interval; - if (user->priority == REPLICATION_PRIORITY_NONE && - next_full_sync > ioloop_time) { - /* we don't want to do a full sync yet */ - *next_secs_r = next_full_sync - ioloop_time; + if (!replicator_queue_can_sync_now(queue, user, next_secs_r)) { + /* we don't want to sync the user yet */ return NULL; } priorityq_remove(queue->user_queue, &user->item); diff --git a/src/replication/replicator/replicator-queue.h b/src/replication/replicator/replicator-queue.h index 20f9d1b9ed..e14df17533 100644 --- a/src/replication/replicator/replicator-queue.h +++ b/src/replication/replicator/replicator-queue.h @@ -12,7 +12,7 @@ struct replicator_user { char *state; /* last time this user's state was updated */ time_t last_update; - /* last_fast_run is always >= last_full_run. */ + /* last_fast_sync is always >= last_full_sync. */ time_t last_fast_sync, last_full_sync; enum replication_priority priority; @@ -24,7 +24,9 @@ struct replicator_user { typedef void replicator_sync_callback_t(bool success, void *context); -struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval); +struct replicator_queue * +replicator_queue_init(unsigned int full_sync_interval, + unsigned int failure_resync_interval); void replicator_queue_deinit(struct replicator_queue **queue); /* Call the specified callback when data is added/removed/moved in queue diff --git a/src/replication/replicator/replicator.c b/src/replication/replicator/replicator.c index 4b53c70d2d..a1d90e24f1 100644 --- a/src/replication/replicator/replicator.c +++ b/src/replication/replicator/replicator.c @@ -14,6 +14,8 @@ #define REPLICATOR_AUTH_SERVICE_NAME "replicator" #define REPLICATOR_DB_DUMP_INTERVAL_MSECS (1000*60*15) +/* if syncing fails, try again in 5 minutes */ +#define REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS (60*5) #define REPLICATOR_DB_FNAME "replicator.db" static struct replicator_queue *queue; @@ -79,7 +81,8 @@ static void main_init(void) sets = master_service_settings_get_others(master_service); set = sets[0]; - queue = replicator_queue_init(set->replication_full_sync_interval); + queue = replicator_queue_init(set->replication_full_sync_interval, + REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS); replication_add_users(queue); to_dump = timeout_add(REPLICATOR_DB_DUMP_INTERVAL_MSECS, replicator_dump_timeout, (void *)NULL); -- 2.47.3