From 19a82b94953ae06839b34df2fc1e57847bef7c50 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Fri, 26 Aug 2022 18:40:46 +0200 Subject: [PATCH] BUG/MEDIUM: peers: Don't use resync timer when local resync is in progress When a worker is stopped, the resync timer is used to limit in time the connection stage to the new worker to perform the local resync. However, this timer must be stopped when the resync is in progress and it must be re-armed if the resync is interrupted (for instance because another reload). Otherwise, if the resync is a bit long, an old worker may be killed too early. This bug was introduce by the commit 160fff665 ("BUG/MEDIUM: peers: limit reconnect attempts of the old process on reload"). It must be backported as far as 2.0. --- src/peers.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/peers.c b/src/peers.c index 1305505451..39a5382d35 100644 --- a/src/peers.c +++ b/src/peers.c @@ -3467,6 +3467,10 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s } } else if (!ps->appctx) { + /* Re-arm resync timeout if necessary */ + if (!tick_isset(peers->resync_timeout)) + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + /* If there's no active peer connection */ if (!tick_is_expired(peers->resync_timeout, now_ms) && (ps->statuscode == 0 || @@ -3502,6 +3506,9 @@ struct task *process_peer_sync(struct task * task, void *context, unsigned int s } } else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE ) { + /* Reset resync timeout during a resync */ + peers->resync_timeout = TICK_ETERNITY; + /* current peer connection is active and established * wake up all peer handlers to push remaining local updates */ for (st = ps->tables; st ; st = st->next) { -- 2.47.3