]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: contrib/prometheus-exporter: Add the last heathcheck duration metric
authorChristopher Faulet <cfaulet@haproxy.com>
Thu, 27 Feb 2020 15:12:07 +0000 (16:12 +0100)
committerChristopher Faulet <cfaulet@haproxy.com>
Fri, 28 Feb 2020 09:49:09 +0000 (10:49 +0100)
ST_F_CHECK_DURATION is now part of exported server metrics, named
haproxy_server_check_duration_seconds and expressed in seconds. For a given
server, this value is exported only if the healthcheck is finished (the status
is greater or equal to HCHK_STATUS_CHECKED).

This patch fixes the issue #519. It may be backported as fat as 2.0.

contrib/prometheus-exporter/README
contrib/prometheus-exporter/service-prometheus.c

index a0df4b21de44888a7b017094c6e36a3125bee23c..a63102028afaffb386c7bea9a5fd6dd3c0b91c6b 100644 (file)
@@ -272,6 +272,7 @@ Exported metrics
 | haproxy_server_weight                              | Service weight.                                                           |
 | haproxy_server_check_status                        | Status of last health check, if enabled. (see below for the mapping)      |
 | haproxy_server_check_code                          | layer5-7 code, if available of the last health check.                     |
+| haproxy_server_check_duration_seconds              | Total duration of the latest server health check, in seconds.             |
 | haproxy_server_check_failures_total                | Total number of failed check (Only when the server is up).                |
 | haproxy_server_check_up_down_total                 | Total number of UP->DOWN transitions.                                     |
 | haproxy_server_downtime_seconds_total              | Total downtime (in seconds) for the service.                              |
index fa80147865f09cb80a83e6150845c84b977ea1f5..6e7eca04bd25b7b9348ac575d898bdf42e91a70b 100644 (file)
@@ -388,8 +388,8 @@ const int promex_srv_metrics[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE_LIM]       = 0,
        [ST_F_RATE_MAX]       = ST_F_LASTSESS,
        [ST_F_CHECK_STATUS]   = ST_F_CHECK_CODE,
-       [ST_F_CHECK_CODE]     = ST_F_CHKFAIL,
-       [ST_F_CHECK_DURATION] = 0,
+       [ST_F_CHECK_CODE]     = ST_F_CHECK_DURATION,
+       [ST_F_CHECK_DURATION] = ST_F_CHKFAIL,
        [ST_F_HRSP_1XX]       = ST_F_HRSP_2XX,
        [ST_F_HRSP_2XX]       = ST_F_HRSP_3XX,
        [ST_F_HRSP_3XX]       = ST_F_HRSP_4XX,
@@ -552,7 +552,7 @@ const struct ist promex_st_metric_names[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE_MAX]       = IST("max_session_rate"),
        [ST_F_CHECK_STATUS]   = IST("check_status"),
        [ST_F_CHECK_CODE]     = IST("check_code"),
-       [ST_F_CHECK_DURATION] = IST("check_duration_milliseconds"),
+       [ST_F_CHECK_DURATION] = IST("check_duration_seconds"),
        [ST_F_HRSP_1XX]       = IST("http_responses_total"),
        [ST_F_HRSP_2XX]       = IST("http_responses_total"),
        [ST_F_HRSP_3XX]       = IST("http_responses_total"),
@@ -715,7 +715,7 @@ const struct ist promex_st_metric_desc[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE_MAX]       = IST("Maximum observed number of sessions per second."),
        [ST_F_CHECK_STATUS]   = IST("Status of last health check (HCHK_STATUS_* values)."),
        [ST_F_CHECK_CODE]     = IST("layer5-7 code, if available of the last health check."),
-       [ST_F_CHECK_DURATION] = IST("Time in ms took to finish last health check."),
+       [ST_F_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."),
        [ST_F_HRSP_1XX]       = IST("Total number of HTTP responses."),
        [ST_F_HRSP_2XX]       = IST("Total number of HTTP responses."),
        [ST_F_HRSP_3XX]       = IST("Total number of HTTP responses."),
@@ -2037,6 +2037,12 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx)
                                                        goto next_sv;
                                                metric = mkf_u32(FN_OUTPUT, (sv->check.status < HCHK_STATUS_L57DATA) ? 0 : sv->check.code);
                                                break;
+                                       case ST_F_CHECK_DURATION:
+                                               if (sv->check.status < HCHK_STATUS_CHECKED)
+                                                   goto next_sv;
+                                               secs = (double)sv->check.duration / 1000.0;
+                                               metric = mkf_flt(FN_DURATION, secs);
+                                               break;
                                        case ST_F_CHKFAIL:
                                                metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks);
                                                break;