]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
replicator: If sync fails, retry it in 5 minutes.
authorTimo Sirainen <tss@iki.fi>
Sun, 24 Mar 2013 15:21:49 +0000 (17:21 +0200)
committerTimo Sirainen <tss@iki.fi>
Sun, 24 Mar 2013 15:21:49 +0000 (17:21 +0200)
src/replication/replicator/replicator-queue.c
src/replication/replicator/replicator-queue.h
src/replication/replicator/replicator.c

index 790617067eb312a49440423188ef0b33bcc8ba18..c2d58fc6455c0cea96da13cb7a78ac9ebce088de 100644 (file)
@@ -30,6 +30,7 @@ struct replicator_queue {
        ARRAY(struct replicator_sync_lookup) sync_lookups;
 
        unsigned int full_sync_interval;
+       unsigned int failure_resync_interval;
 
        void (*change_callback)(void *context);
        void *change_context;
@@ -50,6 +51,18 @@ static int user_priority_cmp(const void *p1, const void *p2)
                        return -1;
                if (user1->last_fast_sync > user2->last_fast_sync)
                        return 1;
+       } else if (user1->last_sync_failed != user2->last_sync_failed) {
+               /* resync failures first */
+               if (user1->last_sync_failed)
+                       return -1;
+               else
+                       return 1;
+       } else if (user1->last_sync_failed) {
+               /* both have failed. resync failures with fast-sync timestamp */
+               if (user1->last_fast_sync < user2->last_fast_sync)
+                       return -1;
+               if (user1->last_fast_sync > user2->last_fast_sync)
+                       return 1;
        } else {
                /* nothing to replicate, but do still periodic full syncs */
                if (user1->last_full_sync < user2->last_full_sync)
@@ -60,12 +73,15 @@ static int user_priority_cmp(const void *p1, const void *p2)
        return 0;
 }
 
-struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval)
+struct replicator_queue *
+replicator_queue_init(unsigned int full_sync_interval,
+                     unsigned int failure_resync_interval)
 {
        struct replicator_queue *queue;
 
        queue = i_new(struct replicator_queue, 1);
        queue->full_sync_interval = full_sync_interval;
+       queue->failure_resync_interval = failure_resync_interval;
        queue->user_queue = priorityq_init(user_priority_cmp, 1024);
        hash_table_create(&queue->user_hash, default_pool, 1024,
                          str_hash, strcmp);
@@ -182,13 +198,35 @@ void replicator_queue_remove(struct replicator_queue *queue,
                queue->change_callback(queue->change_context);
 }
 
+static bool
+replicator_queue_can_sync_now(struct replicator_queue *queue,
+                             struct replicator_user *user,
+                             unsigned int *next_secs_r)
+{
+       time_t next_sync;
+
+       if (user->priority != REPLICATION_PRIORITY_NONE)
+               return TRUE;
+
+       if (user->last_sync_failed) {
+               next_sync = user->last_fast_sync +
+                       queue->failure_resync_interval;
+       } else {
+               next_sync = user->last_full_sync + queue->full_sync_interval;
+       }
+       if (next_sync <= ioloop_time)
+               return TRUE;
+
+       *next_secs_r = next_sync - ioloop_time;
+       return FALSE;
+}
+
 struct replicator_user *
 replicator_queue_pop(struct replicator_queue *queue,
                     unsigned int *next_secs_r)
 {
        struct priorityq_item *item;
        struct replicator_user *user;
-       time_t next_full_sync;
 
        item = priorityq_peek(queue->user_queue);
        if (item == NULL) {
@@ -197,12 +235,8 @@ replicator_queue_pop(struct replicator_queue *queue,
                return NULL;
        }
        user = (struct replicator_user *)item;
-
-       next_full_sync = user->last_full_sync + queue->full_sync_interval;
-       if (user->priority == REPLICATION_PRIORITY_NONE &&
-           next_full_sync > ioloop_time) {
-               /* we don't want to do a full sync yet */
-               *next_secs_r = next_full_sync - ioloop_time;
+       if (!replicator_queue_can_sync_now(queue, user, next_secs_r)) {
+               /* we don't want to sync the user yet */
                return NULL;
        }
        priorityq_remove(queue->user_queue, &user->item);
index 20f9d1b9ed8c98a84a3108dd21041cf5a72d7517..e14df1753395fc2c57e799fe014b4ad7c44186a8 100644 (file)
@@ -12,7 +12,7 @@ struct replicator_user {
        char *state;
        /* last time this user's state was updated */
        time_t last_update;
-       /* last_fast_run is always >= last_full_run. */
+       /* last_fast_sync is always >= last_full_sync. */
        time_t last_fast_sync, last_full_sync;
 
        enum replication_priority priority;
@@ -24,7 +24,9 @@ struct replicator_user {
 
 typedef void replicator_sync_callback_t(bool success, void *context);
 
-struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval);
+struct replicator_queue *
+replicator_queue_init(unsigned int full_sync_interval,
+                     unsigned int failure_resync_interval);
 void replicator_queue_deinit(struct replicator_queue **queue);
 
 /* Call the specified callback when data is added/removed/moved in queue
index 4b53c70d2d0014fb250e20f0c269c7550f972d88..a1d90e24f1238e3e0657cae2a4d0d48d5217bbdf 100644 (file)
@@ -14,6 +14,8 @@
 
 #define REPLICATOR_AUTH_SERVICE_NAME "replicator"
 #define REPLICATOR_DB_DUMP_INTERVAL_MSECS (1000*60*15)
+/* if syncing fails, try again in 5 minutes */
+#define REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS (60*5)
 #define REPLICATOR_DB_FNAME "replicator.db"
 
 static struct replicator_queue *queue;
@@ -79,7 +81,8 @@ static void main_init(void)
        sets = master_service_settings_get_others(master_service);
        set = sets[0];
 
-       queue = replicator_queue_init(set->replication_full_sync_interval);
+       queue = replicator_queue_init(set->replication_full_sync_interval,
+                                     REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS);
        replication_add_users(queue);
        to_dump = timeout_add(REPLICATOR_DB_DUMP_INTERVAL_MSECS,
                              replicator_dump_timeout, (void *)NULL);