]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: contrib/prometheus-exporter: Add heathcheck status/code in server metrics
authorChristopher Faulet <cfaulet@haproxy.com>
Thu, 21 Nov 2019 13:35:46 +0000 (14:35 +0100)
committerChristopher Faulet <cfaulet@haproxy.com>
Mon, 20 Jan 2020 14:18:45 +0000 (15:18 +0100)
ST_F_CHECK_STATUS and ST_F_CHECK_CODE are now part of exported server metrics:

  * haproxy_server_check_status
  * haproxy_server_check_code

The heathcheck status is an integer corresponding to HCHK_STATUS value.

contrib/prometheus-exporter/README
contrib/prometheus-exporter/service-prometheus.c

index b19acc1bdb10dd40efa816c926a389e6bef849e0..a9bd5e7a39a1677c476d8efeeb29eac01e049158 100644 (file)
@@ -268,6 +268,8 @@ Exported metrics
 | haproxy_server_client_aborts_total                 | Total number of data transfers aborted by the client.                     |
 | haproxy_server_server_aborts_total                 | Total number of data transfers aborted by the server.                     |
 | haproxy_server_weight                              | Service weight.                                                           |
+| haproxy_server_check_status                        | Status of last health check, if enabled. (see below for the mapping)      |
+| haproxy_server_check_code                          | layer5-7 code, if available of the last health check.                     |
 | haproxy_server_check_failures_total                | Total number of failed check (Only when the server is up).                |
 | haproxy_server_check_up_down_total                 | Total number of UP->DOWN transitions.                                     |
 | haproxy_server_downtime_seconds_total              | Total downtime (in seconds) for the service.                              |
@@ -278,3 +280,30 @@ Exported metrics
 | haproxy_server_idle_connections_current            | Current number of idle connections available for reuse.                   |
 | haproxy_server_idle_connections_limit              | Limit on the number of available idle connections.                        |
 +----------------------------------------------------+---------------------------------------------------------------------------+
+
+Mapping of health check status :
+
+   0 : HCHK_STATUS_UNKNOWN  (Unknown)
+   1 : HCHK_STATUS_INI      (Initializing)
+
+   4 : HCHK_STATUS_HANA     (Health analyze detected enough consecutive errors)
+
+   5 : HCHK_STATUS_SOCKERR  (Socket error)
+
+   6 : HCHK_STATUS_L4OK     (L4 check passed, for example tcp connect)
+   7 : HCHK_STATUS_L4TOUT   (L4 timeout)
+   8 : HCHK_STATUS_L4CON    (L4 connection problem)
+
+   9 : HCHK_STATUS_L6OK     (L6 check passed)
+  10 : HCHK_STATUS_L6TOUT   (L6 (SSL) timeout)
+  11 : HCHK_STATUS_L6RSP    (L6 invalid response - protocol error)
+
+  12 : HCHK_STATUS_L7TOUT   (L7 (HTTP/SMTP) timeout)
+  13 : HCHK_STATUS_L7RSP    (L7 invalid response - protocol error)
+  15 : HCHK_STATUS_L7OKD    (L7 check passed)
+  16 : HCHK_STATUS_L7OKCD   (L7 check conditionally passed)
+  17 : HCHK_STATUS_L7STS    (L7 response error, for example HTTP 5xx)
+
+  18 : HCHK_STATUS_PROCERR  (External process check failure)
+  19 : HCHK_STATUS_PROCTOUT (External process check timeout)
+  20 : HCHK_STATUS_PROCOK   (External process check passed)
index 0f178eb64e2a376d2b56ac7ce3a71d88438246e7..4cf216a23aa7dfe5d5d257700af564ab6a1de794 100644 (file)
@@ -367,7 +367,7 @@ const int promex_srv_metrics[ST_F_TOTAL_FIELDS] = {
        [ST_F_WRETR]          = ST_F_WREDIS,
        [ST_F_WREDIS]         = ST_F_WREW,
        [ST_F_STATUS]         = ST_F_SCUR,
-       [ST_F_WEIGHT]         = ST_F_CHKFAIL,
+       [ST_F_WEIGHT]         = ST_F_CHECK_STATUS,
        [ST_F_ACT]            = 0,
        [ST_F_BCK]            = 0,
        [ST_F_CHKFAIL]        = ST_F_CHKDOWN,
@@ -385,8 +385,8 @@ const int promex_srv_metrics[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE]           = 0,
        [ST_F_RATE_LIM]       = 0,
        [ST_F_RATE_MAX]       = ST_F_LASTSESS,
-       [ST_F_CHECK_STATUS]   = 0,
-       [ST_F_CHECK_CODE]     = 0,
+       [ST_F_CHECK_STATUS]   = ST_F_CHECK_CODE,
+       [ST_F_CHECK_CODE]     = ST_F_CHKFAIL,
        [ST_F_CHECK_DURATION] = 0,
        [ST_F_HRSP_1XX]       = ST_F_HRSP_2XX,
        [ST_F_HRSP_2XX]       = ST_F_HRSP_3XX,
@@ -709,7 +709,7 @@ const struct ist promex_st_metric_desc[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE]           = IST("Current number of sessions per second over last elapsed second."),
        [ST_F_RATE_LIM]       = IST("Configured limit on new sessions per second."),
        [ST_F_RATE_MAX]       = IST("Maximum observed number of sessions per second."),
-       [ST_F_CHECK_STATUS]   = IST("Status of last health check (If a check is running, the status will be reported, prefixed with '* ')."),
+       [ST_F_CHECK_STATUS]   = IST("Status of last health check (HCHK_STATUS_* values)."),
        [ST_F_CHECK_CODE]     = IST("layer5-7 code, if available of the last health check."),
        [ST_F_CHECK_DURATION] = IST("Time in ms took to finish last health check."),
        [ST_F_HRSP_1XX]       = IST("Total number of HTTP responses."),
@@ -1027,8 +1027,8 @@ const struct ist promex_st_metric_types[ST_F_TOTAL_FIELDS] = {
        [ST_F_RATE]           = IST("untyped"),
        [ST_F_RATE_LIM]       = IST("gauge"),
        [ST_F_RATE_MAX]       = IST("gauge"),
-       [ST_F_CHECK_STATUS]   = IST("untyped"),
-       [ST_F_CHECK_CODE]     = IST("untyped"),
+       [ST_F_CHECK_STATUS]   = IST("gauge"),
+       [ST_F_CHECK_CODE]     = IST("gauge"),
        [ST_F_CHECK_DURATION] = IST("gauge"),
        [ST_F_HRSP_1XX]       = IST("counter"),
        [ST_F_HRSP_2XX]       = IST("counter"),
@@ -2012,6 +2012,16 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx)
                                                weight = (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv;
                                                metric = mkf_u32(FN_AVG, weight);
                                                break;
+                                       case ST_F_CHECK_STATUS:
+                                               if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED)
+                                                       goto next_sv;
+                                               metric = mkf_u32(FN_OUTPUT, sv->check.status);
+                                               break;
+                                       case ST_F_CHECK_CODE:
+                                               if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED)
+                                                       goto next_sv;
+                                               metric = mkf_u32(FN_OUTPUT, (sv->check.status < HCHK_STATUS_L57DATA) ? 0 : sv->check.code);
+                                               break;
                                        case ST_F_CHKFAIL:
                                                metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks);
                                                break;