tune.max-checks-per-thread <number>
Sets the number of active checks per thread above which a thread will
- actively try to search a less loaded thread to run the health check. The
+ actively try to search a less loaded thread to run the health check, or
+ queue it until the number of active checks running on it diminishes. The
default value is zero, meaning no such limit is set. It may be needed in
certain environments running an extremely large number of expensive checks
with many threads when the load appears unequal and may make health checks
to randomly time out on startup, typically when using OpenSSL 3.0 which is
about 20 times more CPU-intensive on health checks than older ones. This will
have for result to try to level the health check work across all threads. The
- vast majority of configurations do not need to touch this parameter.
+ vast majority of configurations do not need to touch this parameter. Please
+ note that too low values may significantly slow down the health checking if
+ checks are slow to execute.
tune.maxaccept <number>
Sets the maximum number of consecutive connections a process may accept in a
are started with a small time offset between them. It is also possible to
add some random noise in the agent and health checks interval using the
global "spread-checks" keyword. This makes sense for instance when a lot
- of backends use the same servers.
+ of backends use the same servers. The global "tune.max-checks-per-thread"
+ setting, if defined to a non-nul value, will limit the number of concurrent
+ checks being performed at once on any given thread. In order to achieve this,
+ haproxy will put in a queue the checks that were about to start on a thread
+ that has reached this limit, until another check finishes. This will have for
+ effect to extend the effective check interval. In such a case, reducing the
+ "inter" setting will have a very limited effect as it will not be able to
+ reduce the time spent in the queue.
log-proto <logproto>
The "log-proto" specifies the protocol used to forward event messages to
/* 4 possible states for CHK_ST_SLEEPING and CHK_ST_READY:
* SLP RDY State Description
- * 0 0 - (reserved)
+ * 0 0 QUEUED Check is in queue due to concurrency limit
* 0 1 RUNNING Check is bound to current thread and running
* 1 0 SLEEPING Check is sleeping, not bound to a thread
* 1 1 MIGRATING Check is migrating to another thread
char *alpn_str; /* ALPN to use for checks */
int alpn_len; /* ALPN string length */
const struct mux_proto_list *mux_proto; /* the mux to use for all outgoing connections (specified by the "proto" keyword) */
+ struct list check_queue; /* entry in the check queue. Not empty = in queue. */
int via_socks4; /* check the connection via socks4 proxy */
};
struct list streams; /* list of streams attached to this thread */
struct list quic_conns; /* list of active quic-conns attached to this thread */
struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */
+ struct list queued_checks; /* checks waiting for a connection slot */
ALWAYS_ALIGN(2*sizeof(void*));
struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */
task_set_thread(t, tid);
check->state &= ~CHK_ST_SLEEPING;
+ /* if we just woke up and the thread is full of running, or
+ * already has others waiting, we might have to wait in queue
+ * (for health checks only). This means !SLEEPING && !READY.
+ */
+ if (check->server &&
+ (!LIST_ISEMPTY(&th_ctx->queued_checks) ||
+ (global.tune.max_checks_per_thread &&
+ _HA_ATOMIC_LOAD(&th_ctx->running_checks) >= global.tune.max_checks_per_thread))) {
+ TRACE_DEVEL("health-check queued", CHK_EV_TASK_WAKE, check);
+ t->expire = TICK_ETERNITY;
+ LIST_APPEND(&th_ctx->queued_checks, &check->check_queue);
+
+ /* reset fastinter flag (if set) so that srv_getinter()
+ * only returns fastinter if server health is degraded
+ */
+ check->state &= ~CHK_ST_FASTINTER;
+ goto out_leave;
+ }
+
/* OK let's run, now we cannot roll back anymore */
check->state |= CHK_ST_READY;
activity[tid].check_started++;
check->state |= CHK_ST_SLEEPING;
update_timer:
+ /* when going to sleep, we need to check if other checks are waiting
+ * for a slot. If so we pick them out of the queue and wake them up.
+ */
+ if (check->server && (check->state & CHK_ST_SLEEPING)) {
+ if (!LIST_ISEMPTY(&th_ctx->queued_checks) &&
+ _HA_ATOMIC_LOAD(&th_ctx->running_checks) < global.tune.max_checks_per_thread) {
+ struct check *next_chk = LIST_ELEM(th_ctx->queued_checks.n, struct check *, check_queue);
+
+ /* wake up pending task */
+ LIST_DEL_INIT(&next_chk->check_queue);
+
+ activity[tid].check_started++;
+ _HA_ATOMIC_INC(&th_ctx->running_checks);
+ next_chk->state |= CHK_ST_READY;
+ /* now running */
+ task_wakeup(next_chk->task, TASK_WOKEN_RES);
+ }
+ }
+
if (check->server) {
rv = 0;
if (global.spread_checks > 0) {
if (check->server)
HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+ out_leave:
TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
/* Free the check if set to PURGE. After this, the check instance may be
check->bi = BUF_NULL;
check->bo = BUF_NULL;
LIST_INIT(&check->buf_wait.list);
+ LIST_INIT(&check->check_queue);
return NULL;
}
REGISTER_SERVER_DEINIT(deinit_srv_check);
REGISTER_SERVER_DEINIT(deinit_srv_agent_check);
+/* perform minimal intializations */
+static void init_checks()
+{
+ int i;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ LIST_INIT(&ha_thread_ctx[i].queued_checks);
+}
+
+INITCALL0(STG_PREPARE, init_checks);
/**************************************************************************/
/************************** Check sample fetches **************************/