]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: peers: apply a random reconnection timeout
authorWilly Tarreau <w@1wt.eu>
Wed, 20 May 2015 08:39:04 +0000 (10:39 +0200)
committerWilly Tarreau <w@1wt.eu>
Wed, 20 May 2015 08:49:07 +0000 (10:49 +0200)
Commit 9ff95bb ("BUG/MEDIUM: peers: correctly configure the client timeout")
uncovered an old bug in the peers : upon disconnect, we reconnect immediately.
This sometimes results in both ends to do the same thing in parallel causing
a loop of connect/accept/close/close that can last several seconds. The risk
of occurrence of the trouble increases with latency, and is emphasized by the
fact that idle connections are now frequently recycled (after 5s of idle).

In order to avoid this we must apply a random delay before reconnecting.
Fortunately the mechanism already supports a reconnect delay, so here we
compute the random timeout when killing a session. The delay is 50ms plus
a random between 0 and 2 seconds. Ideally an exponential back-off would
be preferred but it's preferable to keep the fix simple.

This bug was reported by Marco Corte.

This fix must be backported to 1.5 since the fix above was backported into
1.5.12.

src/peers.c

index 9cc4e0d1fd372b524cb8558af5156e35698ec9cf..468a96dc70e713cdce7a199d10d1e2d08ee7d779 100644 (file)
@@ -1065,6 +1065,7 @@ static struct applet peer_applet = {
 static void peer_session_forceshutdown(struct stream * stream)
 {
        struct appctx *appctx = NULL;
+       struct peer_session *ps;
        int i;
 
        for (i = 0; i <= 1; i++) {
@@ -1079,6 +1080,14 @@ static void peer_session_forceshutdown(struct stream * stream)
        if (!appctx)
                return;
 
+       ps = (struct peer_session *)appctx->ctx.peers.ptr;
+       /* we're killing a connection, we must apply a random delay before
+        * retrying otherwise the other end will do the same and we can loop
+        * for a while.
+        */
+       if (ps)
+               ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + random() % 2000));
+
        /* call release to reinit resync states if needed */
        peer_session_release(appctx);
        appctx->st0 = PEER_SESS_ST_END;
@@ -1238,8 +1247,8 @@ static struct task *process_peer_sync(struct task * task)
                                if (!ps->stream) {
                                        /* no active stream */
                                        if (ps->statuscode == 0 ||
-                                           ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
                                            ((ps->statuscode == PEER_SESS_SC_CONNECTCODE ||
+                                             ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
                                              ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) &&
                                             tick_is_expired(ps->reconnect, now_ms))) {
                                                /* connection never tried
@@ -1250,8 +1259,7 @@ static struct task *process_peer_sync(struct task * task)
                                                /* retry a connect */
                                                ps->stream = peer_session_create(ps->peer, ps);
                                        }
-                                       else if (ps->statuscode == PEER_SESS_SC_CONNECTCODE ||
-                                                ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) {
+                                       else if (!tick_is_expired(ps->reconnect, now_ms)) {
                                                /* If previous session failed during connection
                                                 * but reconnection timer is not expired */