From: Willy Tarreau Date: Tue, 11 Feb 2025 16:18:36 +0000 (+0100) Subject: MEDIUM: server: allocate a tasklet for asyncronous requeuing X-Git-Tag: v3.2-dev6~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b6a8318cc2f874961c3d316f9354f673de84a851;p=thirdparty%2Fhaproxy.git MEDIUM: server: allocate a tasklet for asyncronous requeuing This creates a tasklet that only expects to be called when the LB algorithm is under contention when trying to reposition the server in its tree. Indeed, that's one of the operations that usually requires to take a write lock on a highly contended area, often for very little benefits under contention; indeed, under load, if a server keeps its previous position for a few extra microseconds, usually there's no harm. Thus this new tasklet can be woken up by the LB algo to ask the server to later call lbprm.server_requeue(). It does nothing else. --- diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index d75171d93..ad7315da7 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -395,6 +395,7 @@ struct server { struct eb_root *lb_tree; /* we want to know in what tree the server is */ struct tree_occ *lb_nodes; /* lb_nodes_tot * struct tree_occ */ + struct tasklet *requeue_tasklet; /* tasklet to call to asynchronously requeue the server */ unsigned lb_nodes_tot; /* number of allocated lb_nodes (C-HASH) */ unsigned lb_nodes_now; /* number of lb_nodes placed in the tree (C-HASH) */ enum srv_hash_key hash_key; /* method to compute node hash (C-HASH) */ diff --git a/src/server.c b/src/server.c index 82f237dc8..1fab124d9 100644 --- a/src/server.c +++ b/src/server.c @@ -2442,6 +2442,22 @@ void server_recalc_eweight(struct server *sv, int must_update) srv_update_status(sv, 0, SRV_OP_STCHGC_NONE); } +/* requeuing tasklet used to asynchronously queue the server into its tree in + * case of extreme contention. It is woken up by the code that failed to grab + * an important lock. + */ +struct task *server_requeue(struct task *t, void *context, unsigned int state) +{ + struct server *srv = context; + + /* let's call the LB's requeue function. If it fails, it will itself + * wake us up. + */ + if (srv->proxy->lbprm.server_requeue) + srv->proxy->lbprm.server_requeue(srv); + return t; +} + /* * Parses weight_str and configures sv accordingly. * Returns NULL on success, error message string otherwise. @@ -3072,6 +3088,8 @@ struct server *srv_drop(struct server *srv) guid_remove(&srv->guid); + if (srv->requeue_tasklet) + tasklet_kill(srv->requeue_tasklet); task_destroy(srv->warmup); task_destroy(srv->srvrq_check); @@ -5691,6 +5709,24 @@ static int init_srv_slowstart(struct server *srv) } REGISTER_POST_SERVER_CHECK(init_srv_slowstart); + +/* allocate the tasklet that's meant to permit a server */ +static int init_srv_requeue(struct server *srv) +{ + struct tasklet *t; + + if ((t = tasklet_new()) == NULL) { + ha_alert("Cannot allocate a server requeuing tasklet for server %s/%s: out of memory.\n", srv->proxy->id, srv->id); + return ERR_ALERT | ERR_FATAL; + } + + srv->requeue_tasklet = t; + t->process = server_requeue; + t->context = srv; + return ERR_NONE; +} +REGISTER_POST_SERVER_CHECK(init_srv_requeue); + /* Memory allocation and initialization of the per_thr field. * Returns 0 if the field has been successfully initialized, -1 on failure. */ @@ -5870,6 +5906,9 @@ static int cli_parse_add_server(char **args, char *payload, struct appctx *appct if (init_srv_slowstart(srv)) goto out; + if (init_srv_requeue(srv) != 0) + goto out; + /* Attach the server to the end of the proxy linked list. Note that this * operation is not thread-safe so this is executed under thread * isolation.