]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: connections: permit to permanently remove an idle conn
authorWilly Tarreau <w@1wt.eu>
Wed, 5 Nov 2025 09:51:27 +0000 (10:51 +0100)
committerWilly Tarreau <w@1wt.eu>
Wed, 5 Nov 2025 10:08:25 +0000 (11:08 +0100)
There's currently a function conn_delete_from_tree() which is used to
detach an idle connection from the tree it's currently attached to so
that it is no longer found. This function is used in three circumstances:
  - when picking a new connection that no longer has any avail stream
  - when temporarily working on the connection from an I/O handler,
    in which case it's re-added at the end
  - when killing a connection

The 2nd case above is quite specific, as it requires to preserve the
CO_FL_LIST_MASK flags so that the connection can be re-inserted into
the proper tree when leaving the handler. However, there's a catch.
When killing a connection, we want to be certain it will not be
reinserted into the tree. The flags preservation is causing a tiny
race if an I/O happens while the connection is in the kill list,
because in this case the I/O handler will note the connection flags,
do its work, then reinsert the connection where it believed it was,
then the connection gets purged, and another user can find it in the
tree.

The issue is very difficult to reproduce. On a 128-thread machine it
happens in H2 around 500k req/s after around 50M requests. In H1 it
happens after around 1 billion requests.

The fix here consists in passing an extra argument to the function to
indicate if the removal is permanent or not. When it's permanent, the
function will clear the associated flags. The callers were adjusted
so that all those dequeuing a connection in order to kill it do it
permanently and all other ones do it only temporarily.

A slightly different approach could have worked: the function could
always remove all flags, and the callers would need to restore them.
But this would require trickier modifications of the various call
places, compared to only passing 0/1 to indicate the permanent status.

This will need to be backported to all stable versions. The issue was
at least reproduced since 3.1 (not tested before). The patch will need
to be adjusted for 3.2 and older, because a 2nd argument "thr" was
added in 3.3, so the patch will not apply to older versions as-is.

include/haproxy/connection.h
src/backend.c
src/connection.c
src/mux_fcgi.c
src/mux_h1.c
src/mux_h2.c
src/mux_quic.c
src/mux_spop.c
src/server.c
src/ssl_sock.c

index 7943ee39f866c8352d9c30bbafa5f0c492adb643..0c25e51840544d2aeda0afba1a63e1746f11519f 100644 (file)
@@ -83,7 +83,7 @@ int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess
                         const struct mux_ops *force_mux_ops);
 int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess);
 
-void conn_delete_from_tree(struct connection *conn, int thr);
+void conn_delete_from_tree(struct connection *conn, int thr, int permanent);
 
 void conn_init(struct connection *conn, void *target);
 struct connection *conn_new(void *target);
index 0a5637b4b0243def3853b076d87a1b12e431007b..9937a7f9f0cab7dfb8bd6a1288f8461a1898bc13 100644 (file)
@@ -1319,7 +1319,7 @@ struct connection *conn_backend_get(int reuse_mode,
        HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
        conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
        if (conn)
-               conn_delete_from_tree(conn, tid);
+               conn_delete_from_tree(conn, tid, 0);
 
        /* If we failed to pick a connection from the idle list, let's try again with
         * the safe list.
@@ -1327,7 +1327,7 @@ struct connection *conn_backend_get(int reuse_mode,
        if (!conn && !is_safe && srv->curr_safe_nb > 0) {
                conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
                if (conn) {
-                       conn_delete_from_tree(conn, tid);
+                       conn_delete_from_tree(conn, tid, 0);
                        is_safe = 1;
                }
        }
@@ -1388,7 +1388,7 @@ check_tgid:
                        conn = srv_lookup_conn(tree, hash);
                        while (conn) {
                                if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) {
-                                       conn_delete_from_tree(conn, i);
+                                       conn_delete_from_tree(conn, i, 0);
                                        _HA_ATOMIC_INC(&activity[tid].fd_takeover);
                                        found = 1;
                                        break;
@@ -1490,7 +1490,7 @@ takeover_random_idle_conn(struct ceb_root **root, int curtid)
        conn = ceb64_item_first(root, hash_node.node, hash_node.key, struct connection);
        while (conn) {
                if (conn->mux->takeover && conn->mux->takeover(conn, curtid, 1) == 0) {
-                       conn_delete_from_tree(conn, curtid);
+                       conn_delete_from_tree(conn, curtid, 0);
                        return conn;
                }
                conn = ceb64_item_next(root, hash_node.node, hash_node.key, conn);
@@ -1751,7 +1751,7 @@ int be_reuse_connection(int64_t hash, struct session *sess,
                        if (avail <= 1) {
                                /* no more streams available, remove it from the list */
                                HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-                               conn_delete_from_tree(srv_conn, tid);
+                               conn_delete_from_tree(srv_conn, tid, 0);
                                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                        }
 
@@ -1863,7 +1863,7 @@ int connect_server(struct stream *s)
                HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) {
                        tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list);
-                       conn_delete_from_tree(tokill_conn, tid);
+                       conn_delete_from_tree(tokill_conn, tid, 1);
                        HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
 
                        /* Release the idle lock before calling mux->destroy.
@@ -1892,7 +1892,7 @@ int connect_server(struct stream *s)
 
                                if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) {
                                        tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list);
-                                       conn_delete_from_tree(tokill_conn, i);
+                                       conn_delete_from_tree(tokill_conn, i, 1);
                                }
                                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
 
index 74b2fa1f1d0837c186589d829cf5984cc6c9b543..b4d54cca64c88ace0105d1e930418c4900adbeee 100644 (file)
@@ -74,11 +74,13 @@ struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
 
 /* Remove <conn> idle connection from its attached tree (idle, safe or avail)
  * for the server in the connection's target and thread <thr>. If also present
- * in the secondary server idle list, conn is removed from it.
+ * in the secondary server idle list, conn is removed from it. Finally, if
+ * <permanent> is non-nul, the idle connection flags are cleared as well so
+ * that the connection is not re-inserted later.
  *
  * Must be called with idle_conns_lock held.
  */
-void conn_delete_from_tree(struct connection *conn, int thr)
+void conn_delete_from_tree(struct connection *conn, int thr, int permanent)
 {
        struct ceb_root **conn_tree;
        struct server *srv = __objt_server(conn->target);
@@ -100,6 +102,8 @@ void conn_delete_from_tree(struct connection *conn, int thr)
        }
 
        ceb64_item_delete(conn_tree, hash_node.node, hash_node.key, conn);
+       if (permanent)
+               conn->flags &= ~CO_FL_LIST_MASK;
 }
 
 int conn_create_mux(struct connection *conn, int *closed_connection)
@@ -227,7 +231,7 @@ int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                        HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                }
index f287f6a7e27cdb2cd72428cc3cee0de1e3c3b860..0b97b4843947506b033e1e87b87cbb86b8d0f8f1 100644 (file)
@@ -3068,7 +3068,7 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                }
 
@@ -3329,7 +3329,7 @@ struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state
                 * to steal it from us.
                 */
                if (fconn->conn->flags & CO_FL_LIST_MASK)
-                       conn_delete_from_tree(fconn->conn, tid);
+                       conn_delete_from_tree(fconn->conn, tid, 0);
                else if (fconn->conn->flags & CO_FL_SESS_IDLE)
                        session_detach_idle_conn(fconn->conn->owner, fconn->conn);
 
index d9d63c271a168142d8049e236081d27c604c14c0..fd77700c0f268a1451a835e12f3e113bb4fa7734 100644 (file)
@@ -4335,7 +4335,7 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                }
 
@@ -4475,7 +4475,7 @@ struct task *h1_timeout_task(struct task *t, void *context, unsigned int state)
                 * to steal it from us.
                 */
                if (h1c->conn->flags & CO_FL_LIST_MASK)
-                       conn_delete_from_tree(h1c->conn, tid);
+                       conn_delete_from_tree(h1c->conn, tid, 1);
 
                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
 
index ab2e5bfe9cd7cf1b42e179bfbb082238ed52719f..df59f749e6a2ede95acb7e4cdcf61e391e19954b 100644 (file)
@@ -4985,7 +4985,7 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                }
 
@@ -5162,7 +5162,7 @@ static int h2_process(struct h2c *h2c)
                /* connections in error must be removed from the idle lists */
                if (conn->flags & CO_FL_LIST_MASK) {
                        HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-                       conn_delete_from_tree(conn, tid);
+                       conn_delete_from_tree(conn, tid, 1);
                        HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                }
        }
@@ -5170,7 +5170,7 @@ static int h2_process(struct h2c *h2c)
                /* connections in error must be removed from the idle lists */
                if (conn->flags & CO_FL_LIST_MASK) {
                        HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-                       conn_delete_from_tree(conn, tid);
+                       conn_delete_from_tree(conn, tid, 1);
                        HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                }
        }
@@ -5278,7 +5278,7 @@ struct task *h2_timeout_task(struct task *t, void *context, unsigned int state)
                 * to steal it from us.
                 */
                if (h2c->conn->flags & CO_FL_LIST_MASK)
-                       conn_delete_from_tree(h2c->conn, tid);
+                       conn_delete_from_tree(h2c->conn, tid, 1);
                else if (h2c->conn->flags & CO_FL_SESS_IDLE)
                        session_detach_idle_conn(h2c->conn->owner, h2c->conn);
 
@@ -5360,7 +5360,7 @@ do_leave:
        /* in any case this connection must not be considered idle anymore */
        if (h2c->conn->flags & CO_FL_LIST_MASK) {
                HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-               conn_delete_from_tree(h2c->conn, tid);
+               conn_delete_from_tree(h2c->conn, tid, 1);
                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
        }
 
index 3d9e6b5a981d71aca667c8987c64287f6825065d..bdabfb082a2bf3ec7ad2deccf519a055892bda6a 100644 (file)
@@ -3201,7 +3201,7 @@ static void qcc_shutdown(struct qcc *qcc)
 
        /* A connection is not reusable if app layer is closed. */
        if (qcc->flags & QC_CF_IS_BACK)
-               conn_delete_from_tree(qcc->conn, tid);
+               conn_delete_from_tree(qcc->conn, tid, 1);
 
  out:
        qcc->app_st = QCC_APP_ST_SHUT;
@@ -3404,7 +3404,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state)
                        if (conn->flags & CO_FL_SESS_IDLE)
                                session_detach_idle_conn(conn->owner, conn);
                        else
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                }
 
                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@@ -3528,7 +3528,7 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta
                 * attempts to steal it from us.
                 */
                if (qcc->conn->flags & CO_FL_LIST_MASK)
-                       conn_delete_from_tree(qcc->conn, tid);
+                       conn_delete_from_tree(qcc->conn, tid, 1);
                else if (qcc->conn->flags & CO_FL_SESS_IDLE)
                        session_unown_conn(qcc->conn->owner, qcc->conn);
 
index 5024078fe2c7480d57cbf90fd3c7d8f0ad567f8a..b3a43f03a68fc36955eb16343730d2a429556e32 100644 (file)
@@ -2564,7 +2564,7 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                }
 
@@ -2676,7 +2676,7 @@ static int spop_process(struct spop_conn *spop_conn)
                /* connections in error must be removed from the idle lists */
                if (conn->flags & CO_FL_LIST_MASK) {
                        HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-                       conn_delete_from_tree(conn, tid);
+                       conn_delete_from_tree(conn, tid, 1);
                        HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                }
        }
@@ -2799,7 +2799,7 @@ static struct task *spop_timeout_task(struct task *t, void *context, unsigned in
                 * to steal it from us.
                 */
                if (spop_conn->conn->flags & CO_FL_LIST_MASK)
-                       conn_delete_from_tree(spop_conn->conn, tid);
+                       conn_delete_from_tree(spop_conn->conn, tid, 1);
                else if (spop_conn->conn->flags & CO_FL_SESS_IDLE)
                        session_detach_idle_conn(spop_conn->conn->owner, spop_conn->conn);
 
index a151566012cb40b2a04926105f8ac8ed35520961..ed49dd6e0a91b3d574f0f8fed90dbff42fd997db 100644 (file)
@@ -7212,7 +7212,7 @@ static int srv_migrate_conns_to_remove(struct server *srv, int thr, int toremove
                        break;
 
                conn = LIST_ELEM(srv->per_thr[thr].idle_conn_list.n, struct connection *, idle_list);
-               conn_delete_from_tree(conn, thr);
+               conn_delete_from_tree(conn, thr, 1);
                MT_LIST_APPEND(&idle_conns[thr].toremove_conns, &conn->toremove_list);
                i++;
        }
@@ -7287,8 +7287,7 @@ void srv_release_conn(struct server *srv, struct connection *conn)
        /* Remove the connection from any tree (safe, idle or available) */
        if (ceb_intree(&conn->hash_node.node)) {
                HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-               conn_delete_from_tree(conn, tid);
-               conn->flags &= ~CO_FL_LIST_MASK;
+               conn_delete_from_tree(conn, tid, 1);
                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
        }
 }
@@ -7365,7 +7364,7 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf
                _HA_ATOMIC_DEC(&srv->curr_used_conns);
 
                HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
-               conn_delete_from_tree(conn, tid);
+               conn_delete_from_tree(conn, tid, 0);
 
                if (is_safe) {
                        conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
@@ -7525,7 +7524,7 @@ static void srv_close_idle_conns(struct server *srv)
                                                        hash_node.key, struct connection))) {
                                if (conn->ctrl->ctrl_close)
                                        conn->ctrl->ctrl_close(conn);
-                               conn_delete_from_tree(conn, i);
+                               conn_delete_from_tree(conn, i, 1);
                        }
                }
        }
index 7e621bbe134f39d36950c734ca76c1afcc26e582..4c9f10581284abd48899fc2ffb26a904b22a3adb 100644 (file)
@@ -6752,7 +6752,7 @@ struct task *ssl_sock_io_cb(struct task *t, void *context, unsigned int state)
                                        conn_in_list = 0;
                        }
                        else {
-                               conn_delete_from_tree(conn, tid);
+                               conn_delete_from_tree(conn, tid, 0);
                        }
                }