]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MINOR: checks: Handle synchronous connect when a tcpcheck is started
authorChristopher Faulet <cfaulet@haproxy.com>
Thu, 6 May 2021 14:01:18 +0000 (16:01 +0200)
committerChristopher Faulet <cfaulet@haproxy.com>
Fri, 7 May 2021 10:00:56 +0000 (12:00 +0200)
A connection may be synchronously established. In the tcpcheck context, it
may be a problem if several connections come one after another. In this
case, there is no event to close the very first connection before starting
the next one. The checks is thus blocked and timed out, a L7 timeout error
is reported.

To fix the bug, when a tcpcheck is started, we immediately evaluate its
state. Most of time, nothing is performed and we must wait. But it is thus
possible to handle the result of a successfull connection.

This patch should fix the issue #1234. It must be backported as far as 2.2.

src/check.c

index 026f1c5b60765641fb160677c25636464c047762..1ad587cd1dff21922fe16aea820d104a39ddc4e3 100644 (file)
@@ -1092,8 +1092,8 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
 {
        struct check *check = context;
        struct proxy *proxy = check->proxy;
-       struct conn_stream *cs = check->cs;
-       struct connection *conn = cs_conn(cs);
+       struct conn_stream *cs;
+       struct connection *conn;
        int rv;
        int expired = tick_is_expired(t->expire, now_ms);
 
@@ -1101,6 +1101,7 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
 
        if (check->server)
                HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+
        if (!(check->state & CHK_ST_INPROGRESS)) {
                /* no check currently running */
                if (!expired) /* woke up too early */ {
@@ -1128,101 +1129,103 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
 
                check->current_step = NULL;
                tcpcheck_main(check);
-               goto out_unlock;
+               expired = 0;
        }
-       else {
-               /* there was a test running.
-                * First, let's check whether there was an uncaught error,
-                * which can happen on connect timeout or error.
+
+       cs = check->cs;
+       conn = cs_conn(cs);
+
+       /* there was a test running.
+        * First, let's check whether there was an uncaught error,
+        * which can happen on connect timeout or error.
+        */
+       if (check->result == CHK_RES_UNKNOWN) {
+               /* Here the connection must be defined. Otherwise the
+                * error would have already been detected
                 */
-               if (check->result == CHK_RES_UNKNOWN) {
-                       /* Here the connection must be defined. Otherwise the
-                        * error would have already been detected
-                        */
-                       if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
-                               TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
-                               chk_report_conn_err(check, 0, expired);
+               if ((conn && ((conn->flags & CO_FL_ERROR) || (cs->flags & CS_FL_ERROR))) || expired) {
+                       TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+                       chk_report_conn_err(check, 0, expired);
+               }
+               else {
+                       if (check->state & CHK_ST_CLOSE_CONN) {
+                               TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+                               cs_destroy(cs);
+                               cs = NULL;
+                               conn = NULL;
+                               check->cs = NULL;
+                               check->state &= ~CHK_ST_CLOSE_CONN;
+                               tcpcheck_main(check);
                        }
-                       else {
-                               if (check->state & CHK_ST_CLOSE_CONN) {
-                                       TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
-                                       cs_destroy(cs);
-                                       cs = NULL;
-                                       conn = NULL;
-                                       check->cs = NULL;
-                                       check->state &= ~CHK_ST_CLOSE_CONN;
-                                       tcpcheck_main(check);
-                               }
-                               if (check->result == CHK_RES_UNKNOWN) {
-                                       TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
-                                       goto out_unlock; /* timeout not reached, wait again */
-                               }
+                       if (check->result == CHK_RES_UNKNOWN) {
+                               TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+                               goto out_unlock; /* timeout not reached, wait again */
                        }
                }
+       }
 
-               /* check complete or aborted */
-               TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
+       /* check complete or aborted */
+       TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
 
-               check->current_step = NULL;
+       check->current_step = NULL;
 
-               if (conn && conn->xprt) {
-                       /* The check was aborted and the connection was not yet closed.
-                        * This can happen upon timeout, or when an external event such
-                        * as a failed response coupled with "observe layer7" caused the
-                        * server state to be suddenly changed.
-                        */
-                       cs_drain_and_close(cs);
-               }
+       if (conn && conn->xprt) {
+               /* The check was aborted and the connection was not yet closed.
+                * This can happen upon timeout, or when an external event such
+                * as a failed response coupled with "observe layer7" caused the
+                * server state to be suddenly changed.
+                */
+               cs_drain_and_close(cs);
+       }
 
-               if (cs) {
-                       if (check->wait_list.events)
-                               cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
-                       /* We may have been scheduled to run, and the
-                        * I/O handler expects to have a cs, so remove
-                        * the tasklet
-                        */
-                       tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
-                       cs_destroy(cs);
-                       cs = check->cs = NULL;
-                       conn = NULL;
-               }
+       if (cs) {
+               if (check->wait_list.events)
+                       cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
+               /* We may have been scheduled to run, and the
+                * I/O handler expects to have a cs, so remove
+                * the tasklet
+                */
+               tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
+               cs_destroy(cs);
+               cs = check->cs = NULL;
+               conn = NULL;
+       }
 
-               if (check->sess != NULL) {
-                       vars_prune(&check->vars, check->sess, NULL);
-                       session_free(check->sess);
-                       check->sess = NULL;
-               }
+       if (check->sess != NULL) {
+               vars_prune(&check->vars, check->sess, NULL);
+               session_free(check->sess);
+               check->sess = NULL;
+       }
 
-               if (check->server) {
-                       if (check->result == CHK_RES_FAILED) {
-                               /* a failure or timeout detected */
-                               TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
-                               check_notify_failure(check);
-                       }
-                       else if (check->result == CHK_RES_CONDPASS) {
-                               /* check is OK but asks for stopping mode */
-                               TRACE_DEVEL("report conditional success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
-                               check_notify_stopping(check);
-                       }
-                       else if (check->result == CHK_RES_PASSED) {
-                               /* a success was detected */
-                               TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
-                               check_notify_success(check);
-                       }
+       if (check->server) {
+               if (check->result == CHK_RES_FAILED) {
+                       /* a failure or timeout detected */
+                       TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+                       check_notify_failure(check);
                }
-               task_set_affinity(t, MAX_THREADS_MASK);
-               check_release_buf(check, &check->bi);
-               check_release_buf(check, &check->bo);
-               check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
-
-               if (check->server) {
-                       rv = 0;
-                       if (global.spread_checks > 0) {
-                               rv = srv_getinter(check) * global.spread_checks / 100;
-                               rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
-                       }
-                       t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+               else if (check->result == CHK_RES_CONDPASS) {
+                       /* check is OK but asks for stopping mode */
+                       TRACE_DEVEL("report conditional success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+                       check_notify_stopping(check);
+               }
+               else if (check->result == CHK_RES_PASSED) {
+                       /* a success was detected */
+                       TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+                       check_notify_success(check);
+               }
+       }
+       task_set_affinity(t, MAX_THREADS_MASK);
+       check_release_buf(check, &check->bi);
+       check_release_buf(check, &check->bo);
+       check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
+
+       if (check->server) {
+               rv = 0;
+               if (global.spread_checks > 0) {
+                       rv = srv_getinter(check) * global.spread_checks / 100;
+                       rv -= (int) (2 * rv * (ha_random32() / 4294967295.0));
                }
+               t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
        }
 
  reschedule: