From: Aurelien DARRAGON Date: Tue, 18 Apr 2023 11:52:27 +0000 (+0200) Subject: BUG/MINOR: server: don't miss server stats update on server state transitions X-Git-Tag: v2.8-dev8~56 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9f5853fa385fc9763fbcedfeecf07d51e9b5f60c;p=thirdparty%2Fhaproxy.git BUG/MINOR: server: don't miss server stats update on server state transitions s->last_change and s->down_time updates were manually updated for each effective server state change within srv_update_status(). This is rather error-prone, and as a result there were still some state transitions that were not handled properly since at least 1.8. ie: - when transitionning from DRAIN to READY: downtime was updated (which is wrong since a server in DRAIN state should not be considered as DOWN) - when transitionning from MAINT to READY: downtime was not updated (this can be easily seen in the html stats page) To fix these all at once, and prevent similar bugs from being introduced, we centralize the server last_change and down_time stats logic at the end of srv_update_status(): If the server state changed during the call, then it means that last_change must be updated, with a special case when changing from STOPPED state which means the server was previously DOWN and thus downtime should be updated. This patch depends on: - "MINOR: server: explicitly commit state change in srv_update_status()" This could be backported to every stable versions. --- diff --git a/src/server.c b/src/server.c index 1d1e15216c..65f24d1ae6 100644 --- a/src/server.c +++ b/src/server.c @@ -5287,6 +5287,7 @@ static void srv_update_status(struct server *s) struct proxy *px = s->proxy; int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act; int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN); + enum srv_state srv_prev_state = s->cur_state; int log_level; struct buffer *tmptrash = NULL; @@ -5301,7 +5302,6 @@ static void srv_update_status(struct server *s) s->next_admin = s->cur_admin; if ((s->cur_state != SRV_ST_STOPPED) && (s->next_state == SRV_ST_STOPPED)) { - s->last_change = now.tv_sec; if (s->proxy->lbprm.set_server_status_down) s->proxy->lbprm.set_server_status_down(s); @@ -5338,7 +5338,6 @@ static void srv_update_status(struct server *s) s->counters.down_trans++; } else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) { - s->last_change = now.tv_sec; if (s->proxy->lbprm.set_server_status_down) s->proxy->lbprm.set_server_status_down(s); @@ -5366,10 +5365,6 @@ static void srv_update_status(struct server *s) else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING)) || ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) { - if (s->cur_state == SRV_ST_STOPPED && s->last_change < now.tv_sec) // ignore negative times - s->down_time += now.tv_sec - s->last_change; - - s->last_change = now.tv_sec; if (s->next_state == SRV_ST_STARTING && s->warmup) task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)))); @@ -5471,7 +5466,6 @@ static void srv_update_status(struct server *s) } else { /* server was still running */ check->health = 0; /* failure */ - s->last_change = now.tv_sec; s->next_state = SRV_ST_STOPPED; if (s->proxy->lbprm.set_server_status_down) @@ -5540,7 +5534,6 @@ static void srv_update_status(struct server *s) s->next_state = SRV_ST_STOPPING; } else { - s->last_change = now.tv_sec; s->next_state = SRV_ST_STARTING; if (s->slowstart > 0) { if (s->warmup) @@ -5679,7 +5672,6 @@ static void srv_update_status(struct server *s) if (!(s->cur_admin & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) { /* drain state is applied only if not yet in maint */ - s->last_change = now.tv_sec; if (px->lbprm.set_server_status_down) px->lbprm.set_server_status_down(s); @@ -5710,10 +5702,6 @@ static void srv_update_status(struct server *s) } else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) { /* OK completely leaving drain mode */ - - if (s->last_change < now.tv_sec) // ignore negative times - s->down_time += now.tv_sec - s->last_change; - s->last_change = now.tv_sec; server_recalc_eweight(s, 0); tmptrash = alloc_trash_chunk(); @@ -5793,6 +5781,16 @@ static void srv_update_status(struct server *s) */ srv_lb_commit_status(s); + /* check if server stats must be updated due the the server state change */ + if (srv_prev_state != s->cur_state) { + if (srv_prev_state == SRV_ST_STOPPED) { + /* server was down and no longer is */ + if (s->last_change < now.tv_sec) // ignore negative times + s->down_time += now.tv_sec - s->last_change; + } + s->last_change = now.tv_sec; + } + /* check if backend stats must be updated due to the server state change */ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) set_backend_down(s->proxy); /* backend going down */