From: Christopher Faulet Date: Thu, 27 Feb 2020 15:12:07 +0000 (+0100) Subject: MINOR: contrib/prometheus-exporter: Add the last heathcheck duration metric X-Git-Tag: v2.2-dev4~108 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2711e51016cfc408deb03e66f46bea0fa5bf954c;p=thirdparty%2Fhaproxy.git MINOR: contrib/prometheus-exporter: Add the last heathcheck duration metric ST_F_CHECK_DURATION is now part of exported server metrics, named haproxy_server_check_duration_seconds and expressed in seconds. For a given server, this value is exported only if the healthcheck is finished (the status is greater or equal to HCHK_STATUS_CHECKED). This patch fixes the issue #519. It may be backported as fat as 2.0. --- diff --git a/contrib/prometheus-exporter/README b/contrib/prometheus-exporter/README index a0df4b21de..a63102028a 100644 --- a/contrib/prometheus-exporter/README +++ b/contrib/prometheus-exporter/README @@ -272,6 +272,7 @@ Exported metrics | haproxy_server_weight | Service weight. | | haproxy_server_check_status | Status of last health check, if enabled. (see below for the mapping) | | haproxy_server_check_code | layer5-7 code, if available of the last health check. | +| haproxy_server_check_duration_seconds | Total duration of the latest server health check, in seconds. | | haproxy_server_check_failures_total | Total number of failed check (Only when the server is up). | | haproxy_server_check_up_down_total | Total number of UP->DOWN transitions. | | haproxy_server_downtime_seconds_total | Total downtime (in seconds) for the service. | diff --git a/contrib/prometheus-exporter/service-prometheus.c b/contrib/prometheus-exporter/service-prometheus.c index fa80147865..6e7eca04bd 100644 --- a/contrib/prometheus-exporter/service-prometheus.c +++ b/contrib/prometheus-exporter/service-prometheus.c @@ -388,8 +388,8 @@ const int promex_srv_metrics[ST_F_TOTAL_FIELDS] = { [ST_F_RATE_LIM] = 0, [ST_F_RATE_MAX] = ST_F_LASTSESS, [ST_F_CHECK_STATUS] = ST_F_CHECK_CODE, - [ST_F_CHECK_CODE] = ST_F_CHKFAIL, - [ST_F_CHECK_DURATION] = 0, + [ST_F_CHECK_CODE] = ST_F_CHECK_DURATION, + [ST_F_CHECK_DURATION] = ST_F_CHKFAIL, [ST_F_HRSP_1XX] = ST_F_HRSP_2XX, [ST_F_HRSP_2XX] = ST_F_HRSP_3XX, [ST_F_HRSP_3XX] = ST_F_HRSP_4XX, @@ -552,7 +552,7 @@ const struct ist promex_st_metric_names[ST_F_TOTAL_FIELDS] = { [ST_F_RATE_MAX] = IST("max_session_rate"), [ST_F_CHECK_STATUS] = IST("check_status"), [ST_F_CHECK_CODE] = IST("check_code"), - [ST_F_CHECK_DURATION] = IST("check_duration_milliseconds"), + [ST_F_CHECK_DURATION] = IST("check_duration_seconds"), [ST_F_HRSP_1XX] = IST("http_responses_total"), [ST_F_HRSP_2XX] = IST("http_responses_total"), [ST_F_HRSP_3XX] = IST("http_responses_total"), @@ -715,7 +715,7 @@ const struct ist promex_st_metric_desc[ST_F_TOTAL_FIELDS] = { [ST_F_RATE_MAX] = IST("Maximum observed number of sessions per second."), [ST_F_CHECK_STATUS] = IST("Status of last health check (HCHK_STATUS_* values)."), [ST_F_CHECK_CODE] = IST("layer5-7 code, if available of the last health check."), - [ST_F_CHECK_DURATION] = IST("Time in ms took to finish last health check."), + [ST_F_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."), [ST_F_HRSP_1XX] = IST("Total number of HTTP responses."), [ST_F_HRSP_2XX] = IST("Total number of HTTP responses."), [ST_F_HRSP_3XX] = IST("Total number of HTTP responses."), @@ -2037,6 +2037,12 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) goto next_sv; metric = mkf_u32(FN_OUTPUT, (sv->check.status < HCHK_STATUS_L57DATA) ? 0 : sv->check.code); break; + case ST_F_CHECK_DURATION: + if (sv->check.status < HCHK_STATUS_CHECKED) + goto next_sv; + secs = (double)sv->check.duration / 1000.0; + metric = mkf_flt(FN_DURATION, secs); + break; case ST_F_CHKFAIL: metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks); break;