]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: server: add and use a separate last_change variable for internal use
authorAurelien DARRAGON <adarragon@haproxy.com>
Mon, 30 Jun 2025 08:57:29 +0000 (10:57 +0200)
committerAurelien DARRAGON <adarragon@haproxy.com>
Mon, 30 Jun 2025 14:26:25 +0000 (16:26 +0200)
last_change server metric is used for 2 separate purposes. First it is
used to report last server state change date for stats and other related
metrics. But it is also used internally, including in sensitive paths,
such as lb related stuff to take decision or perform computations
(ie: in srv_dynamic_maxconn()).

Due to last_change counter now being split over thread groups since 16eb0fa
("MAJOR: counters: dispatch counters over thread groups"), reading the
aggregated value has a cost, and we cannot afford to consult last_change
value from srv_dynamic_maxconn() anymore. Moreover, since the value is
used to take decision for the current process we don't wan't the variable
to be updated by another process in our back.

To prevent performance regression and sharing issues, let's instead add a
separate srv->last_change value, which is not updated atomically (given how
rare the  updates are), and only serves for places where the use of the
aggregated last_change counter/stats (split over thread groups) is too
costly.

include/haproxy/server-t.h
src/check.c
src/proxy.c
src/queue.c
src/server.c
src/server_state.c

index c1e26113088c3d357e99d7b3d700f9f4f9da7016..57642a0312b28f4292a0e4c9fcd53f637da68fe5 100644 (file)
@@ -355,6 +355,7 @@ struct server {
        short onmarkedup;                       /* what to do when marked up: one of HANA_ONMARKEDUP_* */
        int slowstart;                          /* slowstart time in seconds (ms in the conf) */
        int idle_ping;                          /* MUX idle-ping interval in ms */
+       unsigned long last_change;              /* internal use only (not for stats purpose): last time the server state was changed, doesn't change often, not updated atomically on purpose */
 
        char *id;                               /* just for identification */
        uint32_t rid;                           /* revision: if id has been reused for a new server, rid won't match */
index 2689eddc07cd6d864441f09e82674494d3fa1746..f717d52289a30b15809bb40d417b1b7bbd8e3aaf 100644 (file)
@@ -994,7 +994,7 @@ int httpchk_build_status_header(struct server *s, struct buffer *buf)
                                      "UP %d/%d", "UP",
                                      "NOLB %d/%d", "NOLB",
                                      "no check" };
-       unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change);
+       unsigned long last_change = s->last_change;
 
        if (!(s->check.state & CHK_ST_ENABLED))
                sv_state = 6;
index 6294005bd50b84a0719c1d02246ecf14ed1bfcef..5e09641a35e89e2b8a5c916347a30b5c0def5e01 100644 (file)
@@ -2972,7 +2972,7 @@ static int dump_servers_state(struct appctx *appctx)
                dump_server_addr(&srv->check.addr, srv_check_addr);
                dump_server_addr(&srv->agent.addr, srv_agent_addr);
 
-               srv_time_since_last_change = ns_to_sec(now_ns) - COUNTERS_SHARED_LAST(srv->counters.shared->tg, last_change);
+               srv_time_since_last_change = ns_to_sec(now_ns) - srv->last_change;
                bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0;
                srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0;
 
index c57bbe690123c594ee7945acb1295ac09e5d5a24..7f06ac9051c9bd694cc86413cef7cff9fb89df55 100644 (file)
@@ -104,7 +104,6 @@ DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn));
 unsigned int srv_dynamic_maxconn(const struct server *s)
 {
        unsigned int max;
-       unsigned long last_change;
 
        if (s->proxy->beconn >= s->proxy->fullconn)
                /* no fullconn or proxy is full */
@@ -115,13 +114,11 @@ unsigned int srv_dynamic_maxconn(const struct server *s)
        else max = MAX(s->minconn,
                       s->proxy->beconn * s->maxconn / s->proxy->fullconn);
 
-       last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change);
-
        if ((s->cur_state == SRV_ST_STARTING) &&
-           ns_to_sec(now_ns) < last_change + s->slowstart &&
-           ns_to_sec(now_ns) >= last_change) {
+           ns_to_sec(now_ns) < s->last_change + s->slowstart &&
+           ns_to_sec(now_ns) >= s->last_change) {
                unsigned int ratio;
-               ratio = 100 * (ns_to_sec(now_ns) - last_change) / s->slowstart;
+               ratio = 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart;
                max = MAX(1, max * ratio / 100);
        }
        return max;
index 697b9dcc5002031ac9263633e8be71b493f7a3e1..3fc1cd9979bb82144904d341f7872a89953403a4 100644 (file)
@@ -144,12 +144,10 @@ const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause)
 
 int srv_downtime(const struct server *s)
 {
-       unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change);
-
-       if ((s->cur_state != SRV_ST_STOPPED) || last_change >= ns_to_sec(now_ns))               // ignore negative time
+       if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns))            // ignore negative time
                return s->down_time;
 
-       return ns_to_sec(now_ns) - last_change + s->down_time;
+       return ns_to_sec(now_ns) - s->last_change + s->down_time;
 }
 
 int srv_getinter(const struct check *check)
@@ -2460,11 +2458,10 @@ INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
  */
 void server_recalc_eweight(struct server *sv, int must_update)
 {
-       unsigned long last_change = COUNTERS_SHARED_LAST(sv->counters.shared->tg, last_change);
        struct proxy *px = sv->proxy;
        unsigned w;
 
-       if (ns_to_sec(now_ns) < last_change || ns_to_sec(now_ns) >= last_change + sv->slowstart) {
+       if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) {
                /* go to full throttle if the slowstart interval is reached unless server is currently down */
                if ((sv->cur_state != SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING))
                        sv->next_state = SRV_ST_RUNNING;
@@ -2476,7 +2473,7 @@ void server_recalc_eweight(struct server *sv, int must_update)
        if ((sv->cur_state == SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
                w = 1;
        else if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
-               w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - last_change) + sv->slowstart) / sv->slowstart;
+               w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart;
        else
                w = px->lbprm.wdiv;
 
@@ -3047,6 +3044,7 @@ struct server *new_server(struct proxy *proxy)
        srv->rid = 0; /* rid defaults to 0 */
 
        srv->next_state = SRV_ST_RUNNING; /* early server setup */
+       srv->last_change = ns_to_sec(now_ns);
 
        srv->check.obj_type = OBJ_TYPE_CHECK;
        srv->check.status = HCHK_STATUS_INI;
@@ -5782,7 +5780,7 @@ static int srv_alloc_lb(struct server *sv, struct proxy *be)
 
 /* updates the server's weight during a warmup stage. Once the final weight is
  * reached, the task automatically stops. Note that any server status change
- * must have updated s->counters.last_change accordingly.
+ * must have updated server last_change accordingly.
  */
 static struct task *server_warmup(struct task *t, void *context, unsigned int state)
 {
@@ -5838,7 +5836,7 @@ static int init_srv_slowstart(struct server *srv)
                if (srv->next_state == SRV_ST_STARTING) {
                        task_schedule(srv->warmup,
                                      tick_add(now_ms,
-                                              MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - COUNTERS_SHARED_LAST(srv->counters.shared->tg, last_change))) / 20)));
+                                              MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20)));
                }
        }
 
@@ -7035,11 +7033,9 @@ static void srv_update_status(struct server *s, int type, int cause)
        /* check if server stats must be updated due the the server state change */
        if (srv_prev_state != s->cur_state) {
                if (srv_prev_state == SRV_ST_STOPPED) {
-                       unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change);
-
                        /* server was down and no longer is */
-                       if (last_change < ns_to_sec(now_ns))                        // ignore negative times
-                               s->down_time += ns_to_sec(now_ns) - last_change;
+                       if (s->last_change < ns_to_sec(now_ns))                        // ignore negative times
+                               s->down_time += ns_to_sec(now_ns) - s->last_change;
                        _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s);
                }
                else if (s->cur_state == SRV_ST_STOPPED) {
@@ -7056,6 +7052,7 @@ static void srv_update_status(struct server *s, int type, int cause)
                        HA_ATOMIC_STORE(&s->proxy->ready_srv, NULL);
 
                HA_ATOMIC_STORE(&s->counters.shared->tg[tgid - 1]->last_change, ns_to_sec(now_ns));
+               s->last_change = ns_to_sec(now_ns);
 
                /* publish the state change */
                _srv_event_hdl_prepare_state(&cb_data.state,
index 6c9b19a0854fef8d3dacf164856174a03fb0add1..ee1f51fcb1b5dba2de399ddd58f1741bf51916bd 100644 (file)
@@ -322,6 +322,7 @@ static void srv_state_srv_update(struct server *srv, int version, char **params)
        }
 
        HA_ATOMIC_STORE(&srv->counters.shared->tg[0]->last_change, ns_to_sec(now_ns) - srv_last_time_change);
+       srv->last_change = ns_to_sec(now_ns) - srv_last_time_change;
        srv->check.status = srv_check_status;
        srv->check.result = srv_check_result;