]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: peers: handle multiple resync requests using shards
authorEmeric Brun <ebrun@haproxy.com>
Mon, 24 Oct 2022 08:04:59 +0000 (10:04 +0200)
committerWilly Tarreau <w@1wt.eu>
Mon, 24 Oct 2022 08:55:53 +0000 (10:55 +0200)
We considered the resync process is finished if a full resync request
is ended receiving the "resync-finish" message. But in the case of
"shards" each node declared with a "shard" has only a partial view
of the table. And the resync process is ended whereas the original
peer tables content contains only a "shard" of the full content.

This patch allow to retrieve the entire tables requesting a resync
from all different "shards".

To do so we don't commit the end of a resync process receiving a
"resync-finish" if the node is part of "shard", we only flag this
peer and all peers using the same shard as "notup2date" as if we
received a "resync-partial" message, and we re-schedule a request
of a resync as it is done receiving a "resync-partial" message.

Doing this the peers flagged "notup2date" won't be addressed for the
next resync request round and the next resync request will be send to
a shard not yet requested.

Receving a "resync-finish" message we also check if all peers using
"shards" are flagged "notup2date". It meens that all peers have been
addressed and we can considered the resync process is now finished.

Note also that the "resync request" scheduler already handle a timeout
and if we are not able to retrieve a full resync after a delay. The
resync process is ended.

This patch should be backported in all versions handling "shard"
on peer lines.

src/peers.c

index 8e6c8b000408858747de522c8fe9c86584247b7f..4434fa3bded7b41e792b60bf9aea30ebfd496508 100644 (file)
@@ -2454,13 +2454,47 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee
                        TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
                                    NULL, &msg_head[1], peers->local->id, peer->id);
                        if (peer->flags & PEER_F_LEARN_ASSIGN) {
+                               int commit_a_finish = 1;
+
                                peer->flags &= ~PEER_F_LEARN_ASSIGN;
                                peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
-                               peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE);
-                               if (peer->local)
-                                       peers->flags |= PEERS_F_RESYNC_LOCALFINISHED;
-                               else
-                                       peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED;
+                               if (peer->srv->shard) {
+                                       struct peer *ps;
+
+                                       peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL;
+                                       peer->flags |= PEER_F_LEARN_NOTUP2DATE;
+                                       for (ps = peers->remote; ps; ps = ps->next) {
+                                               if (ps->srv->shard == peer->srv->shard) {
+                                                       /* flag all peers from same shard
+                                                        * notup2date to disable request
+                                                        * of a resync frm them
+                                                        */
+                                                       ps->flags |= PEER_F_LEARN_NOTUP2DATE;
+                                               }
+                                               else if (ps->srv->shard && !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) {
+                                                       /* it remains some other shards not requested
+                                                        * we don't commit a resync finish to request
+                                                        * the other shards
+                                                        */
+                                                       commit_a_finish = 0;
+                                               }
+                                       }
+
+                                       if (!commit_a_finish) {
+                                               /* it remains some shard to request, we schedule a new request
+                                                */
+                                               peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+                                               task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+                                       }
+                               }
+
+                               if (commit_a_finish) {
+                                       peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE);
+                                       if (peer->local)
+                                               peers->flags |= PEERS_F_RESYNC_LOCALFINISHED;
+                                       else
+                                               peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED;
+                               }
                        }
                        peer->confirm++;
                }