From: Emeric Brun Date: Thu, 19 Oct 2017 12:42:30 +0000 (+0200) Subject: BUG/MEDIUM: log: check result details truncated. X-Git-Tag: v1.8-dev3~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5a13351;p=thirdparty%2Fhaproxy.git BUG/MEDIUM: log: check result details truncated. Fix regression introduced by commit: 'MAJOR: servers: propagate server status changes asynchronously.' The building of the log line was re-worked to be done at the postponed point without lack of data. [wt: this only affects 1.8-dev, no backport needed] --- diff --git a/include/proto/server.h b/include/proto/server.h index 1a649f50d8..b521995d9d 100644 --- a/include/proto/server.h +++ b/include/proto/server.h @@ -152,45 +152,11 @@ void srv_shutdown_streams(struct server *srv, int why); */ void srv_shutdown_backup_streams(struct proxy *px, int why); -/* Appends some information to a message string related to a server going UP or - * DOWN. If both and are null and the server tracks another - * one, a "via" information will be provided to know where the status came from. - * If is non-null, the entire string will be appended after a comma and - * a space (eg: to report some information from the check that changed the state). - * If is non-negative, some information about requeued sessions are - * provided. - */ -void srv_append_status(struct chunk *msg, struct server *s, const char *reason, int xferred, int forced); - -/* Marks server down, regardless of its checks' statuses, notifies by all - * available means, recounts the remaining servers on the proxy and transfers - * queued sessions whenever possible to other servers. It automatically - * recomputes the number of servers, but not the map. Maintenance servers are - * ignored. It reports if non-null as the reason for going down. Note - * that it makes use of the trash to build the log strings, so must - * not be placed there. - */ -void srv_set_stopped(struct server *s, const char *reason); - -/* Marks server up regardless of its checks' statuses and provided it isn't - * in maintenance. Notifies by all available means, recounts the remaining - * servers on the proxy and tries to grab requests from the proxy. It - * automatically recomputes the number of servers, but not the map. Maintenance - * servers are ignored. It reports if non-null as the reason for going - * up. Note that it makes use of the trash to build the log strings, so - * must not be placed there. - */ -void srv_set_running(struct server *s, const char *reason); - -/* Marks server stopping regardless of its checks' statuses and provided it - * isn't in maintenance. Notifies by all available means, recounts the remaining - * servers on the proxy and tries to grab requests from the proxy. It - * automatically recomputes the number of servers, but not the map. Maintenance - * servers are ignored. It reports if non-null as the reason for going - * up. Note that it makes use of the trash to build the log strings, so - * must not be placed there. - */ -void srv_set_stopping(struct server *s, const char *reason); +void srv_append_status(struct chunk *msg, struct server *s, struct check *, int xferred, int forced); + +void srv_set_stopped(struct server *s, const char *reason, struct check *check); +void srv_set_running(struct server *s, const char *reason, struct check *check); +void srv_set_stopping(struct server *s, const char *reason, struct check *check); /* Enables admin flag (among SRV_ADMF_*) on server . This is used to * enforce either maint mode or drain mode. It is not allowed to set more than diff --git a/include/types/server.h b/include/types/server.h index 75054a00e2..cdc33592cc 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -300,7 +300,11 @@ struct server { } tmpl_info; struct dns_srvrq *srvrq; /* Pointer representing the DNS SRV requeest, if any */ struct list update_status; /* to attach to list of servers chnaging status */ - char op_st_chg_reason[48]; /* operational status change's reason */ + struct { + char reason[128]; + short status, code; + long duration; + } op_st_chg; /* operational status change's reason */ char adm_st_chg_cause[48]; /* adminstrative status change's cause */ }; diff --git a/src/checks.c b/src/checks.c index 413365b242..48e2bb94b5 100644 --- a/src/checks.c +++ b/src/checks.c @@ -183,41 +183,6 @@ const char *get_analyze_status(short analyze_status) { return analyze_statuses[HANA_STATUS_UNKNOWN].desc; } -/* Builds a string containing some information about the health check's result. - * The output string is allocated from the trash chunks. If the check is NULL, - * NULL is returned. This is designed to be used when emitting logs about health - * checks. - */ -static const char *check_reason_string(struct check *check) -{ - struct chunk *msg; - - if (!check) - return NULL; - - msg = get_trash_chunk(); - chunk_printf(msg, "reason: %s", get_check_status_description(check->status)); - - if (check->status >= HCHK_STATUS_L57DATA) - chunk_appendf(msg, ", code: %d", check->code); - - if (*check->desc) { - struct chunk src; - - chunk_appendf(msg, ", info: \""); - - chunk_initlen(&src, check->desc, 0, strlen(check->desc)); - chunk_asciiencode(msg, &src, '"'); - - chunk_appendf(msg, "\""); - } - - if (check->duration >= 0) - chunk_appendf(msg, ", check duration: %ldms", check->duration); - - return msg->str; -} - /* * Set check->status, update check->duration and fill check->result with * an adequate CHK_RES_* value. The new check->health is computed based @@ -312,7 +277,7 @@ static void set_server_check_status(struct check *check, short status, const cha (check->result == CHK_RES_CONDPASS) ? "conditionally ":"", (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed"); - srv_append_status(&trash, s, check_reason_string(check), -1, 0); + srv_append_status(&trash, s, check, -1, 0); chunk_appendf(&trash, ", status: %d/%d %s", (check->health >= check->rise) ? check->health - check->rise + 1 : check->health, @@ -345,7 +310,7 @@ static void check_notify_failure(struct check *check) return; /* We only report a reason for the check if we did not do so previously */ - srv_set_stopped(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL); + srv_set_stopped(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL); } /* Marks the check as valid and tries to set its server up, provided @@ -378,7 +343,7 @@ static void check_notify_success(struct check *check) if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING) return; - srv_set_running(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL); + srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL); } /* Marks the check as valid and tries to set its server into stopping mode @@ -406,7 +371,7 @@ static void check_notify_stopping(struct check *check) if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise)) return; - srv_set_stopping(s, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check_reason_string(check) : NULL); + srv_set_running(s, NULL, (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS)) ? check : NULL); } /* note: use health_adjust() only, which first checks that the observe mode is diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index bc6bc914df..2ae1bbb29e 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -685,7 +685,7 @@ int hlua_server_check_force_up(lua_State *L) sv = hlua_check_server(L, 1); if (!(sv->track)) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_running(sv, "changed from Lua script"); + srv_set_running(sv, "changed from Lua script", NULL); } return 0; } @@ -697,7 +697,7 @@ int hlua_server_check_force_nolb(lua_State *L) sv = hlua_check_server(L, 1); if (!(sv->track)) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_stopping(sv, "changed from Lua script"); + srv_set_stopping(sv, "changed from Lua script", NULL); } return 0; } @@ -709,7 +709,7 @@ int hlua_server_check_force_down(lua_State *L) sv = hlua_check_server(L, 1); if (!(sv->track)) { sv->check.health = 0; - srv_set_stopped(sv, "changed from Lua script"); + srv_set_stopped(sv, "changed from Lua script", NULL); } return 0; } @@ -743,7 +743,7 @@ int hlua_server_agent_force_up(lua_State *L) sv = hlua_check_server(L, 1); if (sv->agent.state & CHK_ST_ENABLED) { sv->agent.health = sv->agent.rise + sv->agent.fall - 1; - srv_set_running(sv, "changed from Lua script"); + srv_set_running(sv, "changed from Lua script", NULL); } return 0; } @@ -755,7 +755,7 @@ int hlua_server_agent_force_down(lua_State *L) sv = hlua_check_server(L, 1); if (sv->agent.state & CHK_ST_ENABLED) { sv->agent.health = 0; - srv_set_stopped(sv, "changed from Lua script"); + srv_set_stopped(sv, "changed from Lua script", NULL); } return 0; } diff --git a/src/server.c b/src/server.c index b937c1972f..f57434a293 100644 --- a/src/server.c +++ b/src/server.c @@ -782,17 +782,53 @@ void srv_shutdown_backup_streams(struct proxy *px, int why) /* Appends some information to a message string related to a server going UP or * DOWN. If both and are null and the server tracks another * one, a "via" information will be provided to know where the status came from. - * If is non-null, the entire string will be appended after a comma and - * a space (eg: to report some information from the check that changed the state). - * If is non-negative, some information about requeued streams are + * If is non-null, an entire string describing the check result will be + * appended after a comma and a space (eg: to report some information from the + * check that changed the state). In the other case, the string will be built + * using the check results stored into the struct server if present. + * If is non-negative, some information about requeued sessions are * provided. */ -void srv_append_status(struct chunk *msg, struct server *s, const char *reason, int xferred, int forced) +void srv_append_status(struct chunk *msg, struct server *s, struct check *check, int xferred, int forced) { - if (reason) - chunk_appendf(msg, ", %s", reason); - else if (!forced && s->track) + short status = s->op_st_chg.status; + short code = s->op_st_chg.code; + long duration = s->op_st_chg.duration; + char *desc = s->op_st_chg.reason; + + if (check) { + status = check->status; + code = check->code; + duration = check->duration; + desc = check->desc; + } + + if (status != -1) { + chunk_appendf(msg, ", reason: %s", get_check_status_description(status)); + + if (status >= HCHK_STATUS_L57DATA) + chunk_appendf(msg, ", code: %d", code); + + if (desc && *desc) { + struct chunk src; + + chunk_appendf(msg, ", info: \""); + + chunk_initlen(&src, desc, 0, strlen(desc)); + chunk_asciiencode(msg, &src, '"'); + + chunk_appendf(msg, "\""); + } + + if (duration >= 0) + chunk_appendf(msg, ", check duration: %ldms", duration); + } + else if (desc && *desc) { + chunk_appendf(msg, ", %s", desc); + } + else if (!forced && s->track) { chunk_appendf(msg, " via %s/%s", s->track->proxy->id, s->track->id); + } if (xferred >= 0) { if (s->next_state == SRV_ST_STOPPED) @@ -810,15 +846,14 @@ void srv_append_status(struct chunk *msg, struct server *s, const char *reason, } } -/* Marks server down, regardless of its checks' statuses, notifies by all - * available means, recounts the remaining servers on the proxy and transfers - * queued streams whenever possible to other servers. It automatically - * recomputes the number of servers, but not the map. Maintenance servers are - * ignored. It reports if non-null as the reason for going down. Note - * that it makes use of the trash to build the log strings, so must - * not be placed there. +/* Marks server down, regardless of its checks' statuses. The server is + * registered in a list to postpone the counting of the remaining servers on + * the proxy and transfers queued streams whenever possible to other servers at + * a sync point. Maintenance servers are ignored. It stores the if + * non-null as the reason for going down or the available data from the check + * struct to recompute this reason later. */ -void srv_set_stopped(struct server *s, const char *reason) +void srv_set_stopped(struct server *s, const char *reason, struct check *check) { struct server *srv; @@ -826,26 +861,35 @@ void srv_set_stopped(struct server *s, const char *reason) return; s->next_state = SRV_ST_STOPPED; - if (reason) - strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason)); + *s->op_st_chg.reason = 0; + s->op_st_chg.status = -1; + if (reason) { + strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason)); + } + else if (check) { + if (check->desc) + strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason)); + s->op_st_chg.code = check->code; + s->op_st_chg.status = check->status; + s->op_st_chg.duration = check->duration; + } /* Register changes to be applied asynchronously */ if (LIST_ISEMPTY(&s->update_status)) LIST_ADDQ(&updated_servers, &s->update_status); for (srv = s->trackers; srv; srv = srv->tracknext) - srv_set_stopped(srv, NULL); + srv_set_stopped(srv, NULL, NULL); } /* Marks server up regardless of its checks' statuses and provided it isn't - * in maintenance. Notifies by all available means, recounts the remaining - * servers on the proxy and tries to grab requests from the proxy. It - * automatically recomputes the number of servers, but not the map. Maintenance - * servers are ignored. It reports if non-null as the reason for going - * up. Note that it makes use of the trash to build the log strings, so - * must not be placed there. + * in maintenance. The server is registered in a list to postpone the counting + * of the remaining servers on the proxy and tries to grab requests from the + * proxy at a sync point. Maintenance servers are ignored. It stores the + * if non-null as the reason for going down or the available data + * from the check struct to recompute this reason later. */ -void srv_set_running(struct server *s, const char *reason) +void srv_set_running(struct server *s, const char *reason, struct check *check) { struct server *srv; @@ -856,8 +900,18 @@ void srv_set_running(struct server *s, const char *reason) return; s->next_state = SRV_ST_STARTING; - if (reason) - strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason)); + *s->op_st_chg.reason = 0; + s->op_st_chg.status = -1; + if (reason) { + strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason)); + } + else if (check) { + if (check->desc) + strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason)); + s->op_st_chg.code = check->code; + s->op_st_chg.status = check->status; + s->op_st_chg.duration = check->duration; + } if (s->slowstart <= 0) s->next_state = SRV_ST_RUNNING; @@ -867,18 +921,19 @@ void srv_set_running(struct server *s, const char *reason) LIST_ADDQ(&updated_servers, &s->update_status); for (srv = s->trackers; srv; srv = srv->tracknext) - srv_set_running(srv, NULL); + srv_set_running(srv, NULL, NULL); } /* Marks server stopping regardless of its checks' statuses and provided it - * isn't in maintenance. Notifies by all available means, recounts the remaining - * servers on the proxy and tries to grab requests from the proxy. It - * automatically recomputes the number of servers, but not the map. Maintenance - * servers are ignored. It reports if non-null as the reason for going + * isn't in maintenance. The server is registered in a list to postpone the + * counting of the remaining servers on the proxy and tries to grab requests + * from the proxy. Maintenance servers are ignored. It stores the + * if non-null as the reason for going down or the available data + * from the check struct to recompute this reason later. * up. Note that it makes use of the trash to build the log strings, so * must not be placed there. */ -void srv_set_stopping(struct server *s, const char *reason) +void srv_set_stopping(struct server *s, const char *reason, struct check *check) { struct server *srv; @@ -889,15 +944,25 @@ void srv_set_stopping(struct server *s, const char *reason) return; s->next_state = SRV_ST_STOPPING; - if (reason) - strlcpy2(s->op_st_chg_reason, reason, sizeof(s->op_st_chg_reason)); + *s->op_st_chg.reason = 0; + s->op_st_chg.status = -1; + if (reason) { + strlcpy2(s->op_st_chg.reason, reason, sizeof(s->op_st_chg.reason)); + } + else if (check) { + if (check->desc) + strlcpy2(s->op_st_chg.reason, check->desc, sizeof(s->op_st_chg.reason)); + s->op_st_chg.code = check->code; + s->op_st_chg.status = check->status; + s->op_st_chg.duration = check->duration; + } /* Register changes to be applied asynchronously */ if (LIST_ISEMPTY(&s->update_status)) LIST_ADDQ(&updated_servers, &s->update_status); for (srv = s->trackers; srv; srv = srv->tracknext) - srv_set_stopping(srv, NULL); + srv_set_stopping(srv, NULL, NULL); } /* Enables admin flag (among SRV_ADMF_*) on server . This is used to @@ -2766,18 +2831,18 @@ static void srv_update_state(struct server *srv, int version, char **params) switch (srv_op_state) { case SRV_ST_STOPPED: srv->check.health = 0; - srv_set_stopped(srv, "changed from server-state after a reload"); + srv_set_stopped(srv, "changed from server-state after a reload", NULL); break; case SRV_ST_STARTING: srv->next_state = srv_op_state; break; case SRV_ST_STOPPING: srv->check.health = srv->check.rise + srv->check.fall - 1; - srv_set_stopping(srv, "changed from server-state after a reload"); + srv_set_stopping(srv, "changed from server-state after a reload", NULL); break; case SRV_ST_RUNNING: srv->check.health = srv->check.rise + srv->check.fall - 1; - srv_set_running(srv, ""); + srv_set_running(srv, "", NULL); break; } @@ -4066,15 +4131,15 @@ static int cli_parse_set_server(char **args, struct appctx *appctx, void *privat } else if (strcmp(args[4], "up") == 0) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_running(sv, "changed from CLI"); + srv_set_running(sv, "changed from CLI", NULL); } else if (strcmp(args[4], "stopping") == 0) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_stopping(sv, "changed from CLI"); + srv_set_stopping(sv, "changed from CLI", NULL); } else if (strcmp(args[4], "down") == 0) { sv->check.health = 0; - srv_set_stopped(sv, "changed from CLI"); + srv_set_stopped(sv, "changed from CLI", NULL); } else { appctx->ctx.cli.severity = LOG_ERR; @@ -4090,11 +4155,11 @@ static int cli_parse_set_server(char **args, struct appctx *appctx, void *privat } else if (strcmp(args[4], "up") == 0) { sv->agent.health = sv->agent.rise + sv->agent.fall - 1; - srv_set_running(sv, "changed from CLI"); + srv_set_running(sv, "changed from CLI", NULL); } else if (strcmp(args[4], "down") == 0) { sv->agent.health = 0; - srv_set_stopped(sv, "changed from CLI"); + srv_set_stopped(sv, "changed from CLI", NULL); } else { appctx->ctx.cli.severity = LOG_ERR; @@ -4407,7 +4472,6 @@ static void __server_init(void) cli_register_kw(&cli_kws); } - /* * This function applies server's status changes, it is * is designed to be called asynchronously. @@ -4454,7 +4518,7 @@ void srv_update_status(struct server *s) "%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id); - srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0); + srv_append_status(tmptrash, s, NULL, xferred, 0); Warning("%s.\n", tmptrash->str); /* we don't send an alert if the server was previously paused */ @@ -4486,7 +4550,7 @@ void srv_update_status(struct server *s) "%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id); - srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0); + srv_append_status(tmptrash, s, NULL, xferred, 0); Warning("%s.\n", tmptrash->str); send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str); @@ -4545,7 +4609,7 @@ void srv_update_status(struct server *s) "%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id); - srv_append_status(tmptrash, s, *s->op_st_chg_reason ? s->op_st_chg_reason : NULL, xferred, 0); + srv_append_status(tmptrash, s, NULL, xferred, 0); Warning("%s.\n", tmptrash->str); send_log(s->proxy, LOG_NOTICE, "%s.\n", tmptrash->str); send_email_alert(s, LOG_NOTICE, "%s", tmptrash->str); @@ -4576,6 +4640,10 @@ void srv_update_status(struct server *s) s->next_admin = next_admin; } + /* reset operational state change */ + *s->op_st_chg.reason = 0; + s->op_st_chg.status = s->op_st_chg.code = -1; + s->op_st_chg.duration = 0; /* Now we try to apply pending admin changes */ @@ -4900,7 +4968,6 @@ void srv_update_status(struct server *s) } /* Re-set log strings to empty */ - *s->op_st_chg_reason = 0; *s->adm_st_chg_cause = 0; } /* diff --git a/src/stats.c b/src/stats.c index 614350d562..66e9be4513 100644 --- a/src/stats.c +++ b/src/stats.c @@ -2810,7 +2810,7 @@ static int stats_process_http_post(struct stream_interface *si) case ST_ADM_ACTION_HRUNN: if (!(sv->track)) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_running(sv, "changed from Web interface"); + srv_set_running(sv, "changed from Web interface", NULL); altered_servers++; total_servers++; } @@ -2818,7 +2818,7 @@ static int stats_process_http_post(struct stream_interface *si) case ST_ADM_ACTION_HNOLB: if (!(sv->track)) { sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_stopping(sv, "changed from Web interface"); + srv_set_stopping(sv, "changed from Web interface", NULL); altered_servers++; total_servers++; } @@ -2826,7 +2826,7 @@ static int stats_process_http_post(struct stream_interface *si) case ST_ADM_ACTION_HDOWN: if (!(sv->track)) { sv->check.health = 0; - srv_set_stopped(sv, "changed from Web interface"); + srv_set_stopped(sv, "changed from Web interface", NULL); altered_servers++; total_servers++; } @@ -2848,7 +2848,7 @@ static int stats_process_http_post(struct stream_interface *si) case ST_ADM_ACTION_ARUNN: if (sv->agent.state & CHK_ST_ENABLED) { sv->agent.health = sv->agent.rise + sv->agent.fall - 1; - srv_set_running(sv, "changed from Web interface"); + srv_set_running(sv, "changed from Web interface", NULL); altered_servers++; total_servers++; } @@ -2856,7 +2856,7 @@ static int stats_process_http_post(struct stream_interface *si) case ST_ADM_ACTION_ADOWN: if (sv->agent.state & CHK_ST_ENABLED) { sv->agent.health = 0; - srv_set_stopped(sv, "changed from Web interface"); + srv_set_stopped(sv, "changed from Web interface", NULL); altered_servers++; total_servers++; }