From ddfe0743d8986a1ff71cba5e5ac863e69a4e8e2a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 29 Jun 2020 20:55:53 +0200 Subject: [PATCH] MEDIUM: server: use the two thresholds for the connection release algorithm The algorithm improvement in bdb86bd ("MEDIUM: server: improve estimate of the need for idle connections") is still not enough because there's a hard limit between below and above the FD count, so it continues to end up with many killed connections. Here we're proceeding differently. Given that there are two configured limits, a low and a high one, what we do is that we drop connections when the high limit is reached (what's already done by the killing task anyway), when we're between the low and the high threshold, we only keep the connection if our idle entries are empty (with a preference for safe ones), and below the low threshold, we keep any connection so as to give them a chance of being reused or taken over by another thread. Proceeding like this results in much less dropped connections, we typically see a 99.3% reuse rate (76k conns for 10M requests over 200 servers and 4 threads, with 335k takeovers or 3%), and much less CPU usage variations because there are no more bursts to try to kill extra connections. It should be possible to further improve this by counting the number of threads exploiting a server and trying to optimize the amount of per-thread idle connections so that it is approximately balanced among the threads. --- include/haproxy/server.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/include/haproxy/server.h b/include/haproxy/server.h index 75a5b27f3a..1fa1b2d4db 100644 --- a/include/haproxy/server.h +++ b/include/haproxy/server.h @@ -250,14 +250,17 @@ static inline int srv_add_to_idle_list(struct server *srv, struct connection *co * last purge, or if we already don't have idle conns for the * current thread and we don't exceed last count by global.nbthread. */ - if (srv && srv->pool_purge_delay > 0 && - (srv->max_idle_conns == -1 || srv->max_idle_conns > srv->curr_idle_conns) && - (srv->curr_used_conns + srv->curr_idle_conns < MAX(srv->curr_used_conns, srv->est_need_conns) + - (MT_LIST_ISEMPTY(&srv->safe_conns[tid]) && MT_LIST_ISEMPTY(&srv->idle_conns[tid])) ? global.nbthread : 1) && - !(conn->flags & CO_FL_PRIVATE) && + if (!(conn->flags & CO_FL_PRIVATE) && + srv && srv->pool_purge_delay > 0 && ((srv->proxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR) && - !conn->mux->used_streams(conn) && conn->mux->avail_streams(conn) && - ha_used_fds < global.tune.pool_low_count) { + ha_used_fds < global.tune.pool_high_count && + (srv->max_idle_conns == -1 || srv->max_idle_conns > srv->curr_idle_conns) && + ((ha_used_fds < global.tune.pool_low_count && + MT_LIST_ISEMPTY(&srv->safe_conns[tid]) && + (is_safe || MT_LIST_ISEMPTY(&srv->idle_conns[tid]))) || + (srv->curr_used_conns + srv->curr_idle_conns < + MAX(srv->curr_used_conns, srv->est_need_conns) + global.nbthread)) && + !conn->mux->used_streams(conn) && conn->mux->avail_streams(conn)) { int retadd; retadd = _HA_ATOMIC_ADD(&srv->curr_idle_conns, 1); -- 2.39.5