]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
[MEDIUM] Decrease server health based on http responses / events, version 3
authorKrzysztof Piotr Oledzki <ole@ans.pl>
Tue, 15 Dec 2009 21:31:24 +0000 (22:31 +0100)
committerWilly Tarreau <w@1wt.eu>
Tue, 15 Dec 2009 23:29:27 +0000 (00:29 +0100)
Implement decreasing health based on observing communication between
HAProxy and servers.

Changes in this version 2:
 - documentation
 - close race between a started check and health analysis event
 - don't force fastinter if it is not set
 - better names for options
 - layer4 support

Changes in this version 3:
 - add stats
 - port to the current 1.4 tree

12 files changed:
doc/configuration.txt
include/common/defaults.h
include/proto/checks.h
include/types/checks.h
include/types/counters.h
include/types/server.h
src/cfgparse.c
src/checks.c
src/dumpstats.c
src/proto_http.c
src/proto_tcp.c
src/session.c

index 87c34429d36b096d64c0207f19656c6a8e761deb..4cafff8a1a46bd6220e7ccd231bb0f9379ed4b39 100644 (file)
@@ -4694,6 +4694,13 @@ cookie <value>
   the same cookie value, and it is in fact somewhat common between normal and
   backup servers. See also the "cookie" keyword in backend section.
 
+error-limit <count>
+  If health observing is enabled, the "error-limit" parameter specifies the number
+  of consecutive errors that triggers event selected by the "on-error" option.
+  By default it is set to 10 consecutive errors.
+
+ See also the "check", "error-limit" and "on-error".
+
 fall <count>
   The "fall" parameter states that a server will be considered as dead after
   <count> consecutive unsuccessful health checks. This value defaults to 3 if
@@ -4761,7 +4768,29 @@ minconn <minconn>
   server during normal loads, but push it further for important loads without
   overloading the server during exceptional loads. See also the "maxconn"
   and "maxqueue" parameters, as well as the "fullconn" backend keyword.
-  
+
+observe <mode>
+  This option enables health adjusting based on observing communication with
+  the server. By default this functionality is disabled and enabling it also
+  requires to enable health checks. There are two supported modes: "layer4" and
+  "layer7". In layer4 mode, only successful/unsuccessful tcp connections are
+  significant. In layer7, which is only allowed for http proxies, responses
+  received from server are verified, like valid/wrong http code, unparsable
+  headers, a timeout, etc.
+
+  See also the "check", "on-error" and "error-limit".
+
+on-error <mode>
+  Select what should happen when enough consecutive errors are detected.
+  Currently, four modes are available:
+  - fastinter: force fastinter
+  - fail-check: simulate a failed check, also forces fastinter (default)
+  - sudden-death: simulate a pre-fatal failed health check, one more failed
+    check will mark a server down, forces fastinter
+  - mark-down: mark the server immediately down and force fastinter
+
+  See also the "check", "observe" and "error-limit".
+
 port <port>
   Using the "port" parameter, it becomes possible to use a different port to
   send health-checks. On some servers, it may be desirable to dedicate a port
index b0aee865d5af07af04e71f7a7861c023502e123f..cada729c3dfad68e96ee02abcfd0a449de6b2f28 100644 (file)
 #define DEF_CHECK_REQ   "OPTIONS / HTTP/1.0\r\n\r\n"
 #define DEF_SMTP_CHECK_REQ   "HELO localhost\r\n"
 
+#define DEF_HANA_ONERR         HANA_ONERR_FAILCHK
+#define DEF_HANA_ERRLIMIT      10
+
 // X-Forwarded-For header default
 #define DEF_XFORWARDFOR_HDR    "X-Forwarded-For"
 
index bd701645d1db2574c9e4f87b1ea1a0bd5a7803f9..e3fe7accd11ec796bd74c30e4d2608d910bb6e28 100644 (file)
@@ -29,6 +29,7 @@ const char *get_check_status_description(short check_status);
 const char *get_check_status_info(short check_status);
 struct task *process_chk(struct task *t);
 int start_checks();
+void health_adjust(struct server *s, short status);
 
 #endif /* _PROTO_CHECKS_H */
 
index 1b046083c6571f1decd350a62654f44d9eb3086d..75e32b6b4b31163b9cb8349d6579ba3a618bc699 100644 (file)
@@ -18,6 +18,9 @@ enum {
 
        /* Below we have finished checks */
        HCHK_STATUS_CHECKED,            /* DUMMY STATUS */
+
+       HCHK_STATUS_HANA,               /* Healt analyze detected enough consecutive errors */
+
        HCHK_STATUS_SOCKERR,            /* Socket error */
 
        HCHK_STATUS_L4OK,               /* L4 check passed, for example tcp connect */
@@ -41,8 +44,51 @@ enum {
        HCHK_STATUS_SIZE
 };
 
+
+/* health status for response tracking */
+enum {
+       HANA_STATUS_UNKNOWN     = 0,
+
+       HANA_STATUS_L4_OK,              /* L4 successful connection */
+       HANA_STATUS_L4_ERR,             /* L4 unsuccessful connection */
+
+       HANA_STATUS_HTTP_OK,            /* Correct http response */
+       HANA_STATUS_HTTP_STS,           /* Wrong http response, for example HTTP 5xx */
+       HANA_STATUS_HTTP_HDRRSP,        /* Invalid http response (headers) */
+       HANA_STATUS_HTTP_RSP,           /* Invalid http response */
+
+       HANA_STATUS_HTTP_READ_ERROR,    /* Read error */
+       HANA_STATUS_HTTP_READ_TIMEOUT,  /* Read timeout */
+       HANA_STATUS_HTTP_BROKEN_PIPE,   /* Unexpected close from server */
+
+       HANA_STATUS_SIZE
+};
+
+enum {
+       HANA_ONERR_UNKNOWN      = 0,
+
+       HANA_ONERR_FASTINTER,           /* Force fastinter*/
+       HANA_ONERR_FAILCHK,             /* Simulate a failed check */
+       HANA_ONERR_SUDDTH,              /* Enters sudden death - one more failed check will mark this server down */
+       HANA_ONERR_MARKDWN,             /* Mark this server down, now! */
+};
+
+enum {
+       HANA_OBS_NONE           = 0,
+
+       HANA_OBS_LAYER4,                /* Observe L4 - for example tcp */
+       HANA_OBS_LAYER7,                /* Observe L7 - for example http */
+
+       HANA_OBS_SIZE
+};
+
 struct check_status {
        short result;                   /* one of SRV_CHK_* */
        char *info;                     /* human readable short info */
        char *desc;                     /* long description */
 };
+
+struct analyze_status {
+       char *desc;                             /* description */
+       unsigned char lr[HANA_OBS_SIZE];        /* result for l4/l7: 0 = ignore, 1 - error, 2 - OK */
+};
index f551bf07aba73e3dfecf374fa3939bdbbc7bd5b7..fa648a34e60e039559b2470ef6e9daa4852efa06 100644 (file)
@@ -81,7 +81,8 @@ struct srvcounters {
                } http;
        } p;
 
-       long long failed_checks, down_trans;    /* failed checks and up->down transitions */
+       long long failed_checks, failed_hana;   /* failed health checks and health analyses */
+       long long down_trans;                   /* up->down transitions */
 };
 
 #endif /* _TYPES_COUNTERS_H */
index 935992d40cc75c06dd047b2f439775375421bfde..745a94f9ff33b24563be24890a0bbcbc46ff5c39 100644 (file)
@@ -115,7 +115,10 @@ struct server {
        struct sockaddr_in check_addr;          /* the address to check, if different from <addr> */
        short check_port;                       /* the port to use for the health checks */
        int health;                             /* 0->rise-1 = bad; rise->rise+fall-1 = good */
+       int consecutive_errors;                 /* current number of consecutive errors */
        int rise, fall;                         /* time in iterations */
+       int consecutive_errors_limit;           /* number of consecutive errors that triggers an event */
+       short observe, onerror;                 /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */
        int inter, fastinter, downinter;        /* checks: time in milliseconds */
        int slowstart;                          /* slowstart time in seconds (ms in the conf) */
        int result;                             /* health-check result : SRV_CHK_* */
@@ -137,9 +140,9 @@ struct server {
        unsigned down_time;                     /* total time the server was down */
        time_t last_change;                     /* last time, when the state was changed */
        struct timeval check_start;             /* last health check start time */
-       unsigned long check_duration;           /* time in ms took to finish last health check */
+       long check_duration;                    /* time in ms took to finish last health check */
        short check_status, check_code;         /* check result, check code */
-       char check_desc[HCHK_DESC_LEN];         /* healt check descritpion */
+       char check_desc[HCHK_DESC_LEN];         /* health check descritpion */
 
        struct freq_ctr sess_per_sec;           /* sessions per second on this server */
        int puid;                               /* proxy-unique server ID, used for SNMP */
index 4ab5e4bfb0d27e5e7ac0ee6546864c071a7ff280..1a2e9b07db69abe4ef85d1ed4a9c398e0051a598 100644 (file)
@@ -2618,6 +2618,8 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
                newsrv->uweight = newsrv->iweight = 1;
                newsrv->maxqueue = 0;
                newsrv->slowstart = 0;
+               newsrv->onerror = DEF_HANA_ONERR;
+               newsrv->consecutive_errors_limit = DEF_HANA_ERRLIMIT;
 
                cur_arg = 3;
                while (*args[cur_arg]) {
@@ -2823,6 +2825,65 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
                                do_check = 1;
                                cur_arg += 1;
                        }
+                       else if (!strcmp(args[cur_arg], "observe")) {
+                               if (!strcmp(args[cur_arg + 1], "none"))
+                                       newsrv->observe = HANA_OBS_NONE;
+                               else if (!strcmp(args[cur_arg + 1], "layer4"))
+                                       newsrv->observe = HANA_OBS_LAYER4;
+                               else if (!strcmp(args[cur_arg + 1], "layer7")) {
+                                       if (curproxy->mode != PR_MODE_HTTP) {
+                                               Alert("parsing [%s:%d]: '%s' can only be used in http proxies.\n",
+                                                       file, linenum, args[cur_arg + 1]);
+                                               err_code |= ERR_ALERT;
+                                       }
+                                       newsrv->observe = HANA_OBS_LAYER7;
+                               }
+                               else {
+                                       Alert("parsing [%s:%d]: '%s' expects one of 'none', "
+                                               "'l4events', 'http-responses' but get '%s'\n",
+                                               file, linenum, args[cur_arg], args[cur_arg + 1]);
+                                       err_code |= ERR_ALERT | ERR_FATAL;
+                                       goto out;
+                               }
+
+                               cur_arg += 2;
+                       }
+                       else if (!strcmp(args[cur_arg], "on-error")) {
+                               if (!strcmp(args[cur_arg + 1], "fastinter"))
+                                       newsrv->onerror = HANA_ONERR_FASTINTER;
+                               else if (!strcmp(args[cur_arg + 1], "fail-check"))
+                                       newsrv->onerror = HANA_ONERR_FAILCHK;
+                               else if (!strcmp(args[cur_arg + 1], "sudden-death"))
+                                       newsrv->onerror = HANA_ONERR_SUDDTH;
+                               else if (!strcmp(args[cur_arg + 1], "mark-down"))
+                                       newsrv->onerror = HANA_ONERR_MARKDWN;
+                               else {
+                                       Alert("parsing [%s:%d]: '%s' expects one of 'fastinter', "
+                                               "'fail-check', 'sudden-death' or 'mark-down' but get '%s'\n",
+                                               file, linenum, args[cur_arg], args[cur_arg + 1]);
+                                       err_code |= ERR_ALERT | ERR_FATAL;
+                                       goto out;
+                               }
+
+                               cur_arg += 2;
+                       }
+                       else if (!strcmp(args[cur_arg], "error-limit")) {
+                               if (!*args[cur_arg + 1]) {
+                                       Alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
+                                               file, linenum, args[cur_arg]);
+                                       err_code |= ERR_ALERT | ERR_FATAL;
+                                       goto out;
+                               }
+
+                               newsrv->consecutive_errors_limit = atoi(args[cur_arg + 1]);
+
+                               if (newsrv->consecutive_errors_limit <= 0) {
+                                       Alert("parsing [%s:%d]: %s has to be > 0.\n",
+                                               file, linenum, args[cur_arg]);
+                                       err_code |= ERR_ALERT | ERR_FATAL;
+                                       goto out;
+                               }
+                       }
                        else if (!strcmp(args[cur_arg], "source")) {  /* address to which we bind when connecting */
                                int port_low, port_high;
                                if (!*args[cur_arg + 1]) {
index e7aefb49338c3cd3a3d73fe9542c55a7630a4787..388efdf0249af982822d7ad175133a6c880b7917 100644 (file)
@@ -52,6 +52,8 @@ const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
        [HCHK_STATUS_INI]       = { SRV_CHK_UNKNOWN,                   "INI",     "Initializing" },
        [HCHK_STATUS_START]     = { /* SPECIAL STATUS*/ },
 
+       [HCHK_STATUS_HANA]      = { SRV_CHK_ERROR,                     "HANA",    "Health analyze" },
+
        [HCHK_STATUS_SOCKERR]   = { SRV_CHK_ERROR,                     "SOCKERR", "Socket error" },
 
        [HCHK_STATUS_L4OK]      = { SRV_CHK_RUNNING,                   "L4OK",    "Layer4 check passed" },
@@ -72,6 +74,22 @@ const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
        [HCHK_STATUS_L7STS]     = { SRV_CHK_ERROR,                     "L7STS",   "Layer7 wrong status" },
 };
 
+const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = {             /* 0: ignore, 1: error, 2: OK */
+       [HANA_STATUS_UNKNOWN]           = { "Unknown",                         { 0, 0 }},
+
+       [HANA_STATUS_L4_OK]             = { "L4 successful connection",        { 2, 0 }},
+       [HANA_STATUS_L4_ERR]            = { "L4 unsuccessful connection",      { 1, 1 }},
+
+       [HANA_STATUS_HTTP_OK]           = { "Correct http response",           { 0, 2 }},
+       [HANA_STATUS_HTTP_STS]          = { "Wrong http response",             { 0, 1 }},
+       [HANA_STATUS_HTTP_HDRRSP]       = { "Invalid http response (headers)", { 0, 1 }},
+       [HANA_STATUS_HTTP_RSP]          = { "Invalid http response",           { 0, 1 }},
+
+       [HANA_STATUS_HTTP_READ_ERROR]   = { "Read error (http)",               { 0, 1 }},
+       [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)",             { 0, 1 }},
+       [HANA_STATUS_HTTP_BROKEN_PIPE]  = { "Close from server (http)",        { 0, 1 }},
+};
+
 /*
  * Convert check_status code to description
  */
@@ -108,6 +126,21 @@ const char *get_check_status_info(short check_status) {
                return check_statuses[HCHK_STATUS_UNKNOWN].info;
 }
 
+const char *get_analyze_status(short analyze_status) {
+
+       const char *desc;
+
+       if (analyze_status < HANA_STATUS_SIZE)
+               desc = analyze_statuses[analyze_status].desc;
+       else
+               desc = NULL;
+
+       if (desc && *desc)
+               return desc;
+       else
+               return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
+}
+
 #define SSP_O_VIA      0x0001
 #define SSP_O_HCHK     0x0002
 #define SSP_O_STATUS   0x0004
@@ -136,7 +169,8 @@ static void server_status_printf(struct chunk *msg, struct server *s, unsigned o
                        chunk_printf(msg, "\"");
                }
 
-               chunk_printf(msg, ", check duration: %lums", s->check_duration);
+               if (s->check_duration >= 0)
+                       chunk_printf(msg, ", check duration: %ldms", s->check_duration);
        }
 
        if (options & SSP_O_STATUS) {
@@ -184,9 +218,11 @@ static void set_server_check_status(struct server *s, short status, char *desc)
 
        s->check_status = status;
        if (check_statuses[status].result)
-               s->result |= check_statuses[status].result;
+               s->result = check_statuses[status].result;
 
-       if (!tv_iszero(&s->check_start)) {
+       if (status == HCHK_STATUS_HANA)
+               s->check_duration = -1;
+       else if (!tv_iszero(&s->check_start)) {
                /* set_server_check_status() may be called more than once */
                s->check_duration = tv_ms_elapsed(&s->check_start, &now);
                tv_zero(&s->check_start);
@@ -229,6 +265,10 @@ static void set_server_check_status(struct server *s, short status, char *desc)
                                if (health >= rise)
                                        health = rise + fall - 1; /* OK now */
                        }
+
+                       /* clear consecutive_errors if observing is enabled */
+                       if (s->onerror)
+                               s->consecutive_errors = 0;
                }
                /* FIXME end: calculate local version of the health/rise/fall/state */
 
@@ -505,6 +545,96 @@ static void set_server_enabled(struct server *s) {
                        set_server_enabled(srv);
 }
 
+void health_adjust(struct server *s, short status) {
+
+       int failed;
+       int expire;
+
+       /* return now if observing nor health check is not enabled */
+       if (!s->observe || !s->check)
+               return;
+
+       if (s->observe >= HANA_OBS_SIZE)
+               return;
+
+       if (status >= HCHK_STATUS_SIZE || !analyze_statuses[status].desc)
+               return;
+
+       switch (analyze_statuses[status].lr[s->observe - 1]) {
+               case 1:
+                       failed = 1;
+                       break;
+
+               case 2:
+                       failed = 0;
+                       break;
+
+               default:
+                       return;
+       }
+
+       if (!failed) {
+               /* good: clear consecutive_errors */
+               s->consecutive_errors = 0;
+               return;
+       }
+
+       s->consecutive_errors++;
+
+       if (s->consecutive_errors < s->consecutive_errors_limit)
+               return;
+
+       sprintf(trash, "Detected %d consecutive errors, last one was: %s",
+               s->consecutive_errors, get_analyze_status(status));
+
+       switch (s->onerror) {
+               case HANA_ONERR_FASTINTER:
+               /* force fastinter - nothing to do here as all modes force it */
+                       break;
+
+               case HANA_ONERR_SUDDTH:
+               /* simulate a pre-fatal failed health check */
+                       if (s->health > s->rise)
+                               s->health = s->rise + 1;
+
+                       /* no break - fall through */
+
+               case HANA_ONERR_FAILCHK:
+               /* simulate a failed health check */
+                       set_server_check_status(s, HCHK_STATUS_HANA, trash);
+
+                       if (s->health > s->rise) {
+                               s->health--; /* still good */
+                               s->counters.failed_checks++;
+                       }
+                       else
+                               set_server_down(s);
+
+                       break;
+
+               case HANA_ONERR_MARKDWN:
+               /* mark server down */
+                       s->health = s->rise;
+                       set_server_check_status(s, HCHK_STATUS_HANA, trash);
+                       set_server_down(s);
+
+                       break;
+
+               default:
+                       /* write a warning? */
+                       break;
+       }
+
+       s->consecutive_errors = 0;
+       s->counters.failed_hana++;
+
+       if (s->fastinter) {
+               expire = tick_add(now_ms, MS_TO_TICKS(s->fastinter));
+               if (s->check->expire > expire)
+                       s->check->expire = expire;
+       }
+}
+
 /*
  * This function is used only for server health-checks. It handles
  * the connection acknowledgement. If the proxy requires L7 health-checks,
index 866f4996178959e427801a42cfeaaea91217ec44..fec189a2035bb6b174045c461bf71834d0bf0d45 100644 (file)
@@ -244,7 +244,7 @@ int print_csv_header(struct chunk *msg)
                            "pid,iid,sid,throttle,lbtot,tracked,type,"
                            "rate,rate_lim,rate_max,"
                            "check_status,check_code,check_duration,"
-                           "hrsp_1xx,hrsp_2xx,hrsp_3xx,hrsp_4xx,hrsp_5xx,hrsp_other,"
+                           "hrsp_1xx,hrsp_2xx,hrsp_3xx,hrsp_4xx,hrsp_5xx,hrsp_other,hanafail,"
                            "\n");
 }
 
@@ -1370,6 +1370,9 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                        chunk_printf(&msg, ",,,,,,");
                                }
 
+                               /* failed health analyses */
+                               chunk_printf(&msg, ",");
+
                                /* finish with EOL */
                                chunk_printf(&msg, "\n");
                        }
@@ -1457,6 +1460,8 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                     ",,,"
                                     /* http response: 1xx, 2xx, 3xx, 4xx, 5xx, other */
                                     ",,,,,,"
+                                    /* failed health analyses */
+                                    ","
                                     "\n",
                                     px->id, l->name,
                                     l->nbconn, l->counters->conn_max,
@@ -1610,7 +1615,7 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                        if (sv->check_status >= HCHK_STATUS_L57DATA)
                                                chunk_printf(&msg, "/%d", sv->check_code);
 
-                                       if (sv->check_status >= HCHK_STATUS_CHECKED)
+                                       if (sv->check_status >= HCHK_STATUS_CHECKED && sv->check_duration >= 0)
                                                chunk_printf(&msg, " in %lums", sv->check_duration);
                                } else {
                                        chunk_printf(&msg, "</td><td>");
@@ -1629,11 +1634,11 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                /* check failures: unique, fatal, down time */
                                if (sv->state & SRV_CHECKED)
                                        chunk_printf(&msg,
-                                            "<td>%lld</td><td>%lld</td>"
-                                            "<td>%s</td>"
+                                            "<td title=\"Failed Health Checks/Health Analyses\">%lld/%lld</td>"
+                                            "<td>%lld</td><td>%s</td>"
                                             "",
-                                            svs->counters.failed_checks, svs->counters.down_trans,
-                                            human_time(srv_downtime(sv), 1));
+                                            svs->counters.failed_checks, svs->counters.failed_hana,
+                                            svs->counters.down_trans, human_time(srv_downtime(sv), 1));
                                else if (sv != svs)
                                        chunk_printf(&msg,
                                             "<td class=ac colspan=3>via %s/%s</td>", svs->proxy->id, svs->id);
@@ -1772,6 +1777,9 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                        chunk_printf(&msg, ",,,,,,");
                                }
 
+                               /* failed health analyses */
+                               chunk_printf(&msg, "%lld,",  sv->counters.failed_hana);
+
                                /* finish with EOL */
                                chunk_printf(&msg, "\n");
                        }
@@ -1919,6 +1927,9 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri)
                                        chunk_printf(&msg, ",,,,,,");
                                }
 
+                               /* failed health analyses */
+                               chunk_printf(&msg, ",");
+
                                /* finish with EOL */
                                chunk_printf(&msg, "\n");
 
index e4662a3f386ce184e59bef1c8074f3d034307fea..8ad440102e7e347ae234cde9d234c39375041f04 100644 (file)
@@ -41,6 +41,7 @@
 #include <proto/acl.h>
 #include <proto/backend.h>
 #include <proto/buffers.h>
+#include <proto/checks.h>
 #include <proto/client.h>
 #include <proto/dumpstats.h>
 #include <proto/fd.h>
@@ -2948,8 +2949,10 @@ int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit)
                                http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe);
 
                        s->be->counters.failed_resp++;
-                       if (s->srv)
+                       if (s->srv) {
                                s->srv->counters.failed_resp++;
+                               health_adjust(s->srv, HANA_STATUS_HTTP_HDRRSP);
+                       }
 
                        rep->analysers = 0;
                        txn->status = 502;
@@ -2974,8 +2977,10 @@ int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit)
                                http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe);
 
                        s->be->counters.failed_resp++;
-                       if (s->srv)
+                       if (s->srv) {
                                s->srv->counters.failed_resp++;
+                               health_adjust(s->srv, HANA_STATUS_HTTP_READ_ERROR);
+                       }
 
                        rep->analysers = 0;
                        txn->status = 502;
@@ -2994,8 +2999,10 @@ int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit)
                                http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe);
 
                        s->be->counters.failed_resp++;
-                       if (s->srv)
+                       if (s->srv) {
                                s->srv->counters.failed_resp++;
+                               health_adjust(s->srv, HANA_STATUS_HTTP_READ_TIMEOUT);
+                       }
 
                        rep->analysers = 0;
                        txn->status = 504;
@@ -3014,8 +3021,10 @@ int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit)
                                http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe);
 
                        s->be->counters.failed_resp++;
-                       if (s->srv)
+                       if (s->srv) {
                                s->srv->counters.failed_resp++;
+                               health_adjust(s->srv, HANA_STATUS_HTTP_BROKEN_PIPE);
+                       }
 
                        rep->analysers = 0;
                        txn->status = 502;
@@ -3070,6 +3079,11 @@ int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit)
 
        txn->status = strl2ui(rep->data + msg->sl.st.c, msg->sl.st.c_l);
 
+       if (txn->status >= 100 && txn->status < 500)
+               health_adjust(s->srv, HANA_STATUS_HTTP_OK);
+       else
+               health_adjust(s->srv, HANA_STATUS_HTTP_STS);
+
        /*
         * 2: check for cacheability.
         */
@@ -3257,8 +3271,10 @@ int http_process_res_common(struct session *t, struct buffer *rep, int an_bit, s
                        if (rule_set->rsp_exp != NULL) {
                                if (apply_filters_to_response(t, rep, rule_set->rsp_exp) < 0) {
                                return_bad_resp:
-                                       if (t->srv)
+                                       if (t->srv) {
                                                t->srv->counters.failed_resp++;
+                                               health_adjust(t->srv, HANA_STATUS_HTTP_RSP);
+                                       }
                                        cur_proxy->counters.failed_resp++;
                                return_srv_prx_502:
                                        rep->analysers = 0;
index 68da06336e431ce326e060f912e7be1f0d8f77d5..f13c321c59fd3cdc22a879d38c93c14cbff7cda4 100644 (file)
@@ -43,6 +43,7 @@
 #include <proto/acl.h>
 #include <proto/backend.h>
 #include <proto/buffers.h>
+#include <proto/checks.h>
 #include <proto/fd.h>
 #include <proto/log.h>
 #include <proto/port_range.h>
index 03285e25caf55508d6a7b0ff51dcf5b1da3bdc71..81e4a3206a76be7356cc18adcaf8b6a32d2bc220 100644 (file)
@@ -22,6 +22,7 @@
 #include <proto/acl.h>
 #include <proto/backend.h>
 #include <proto/buffers.h>
+#include <proto/checks.h>
 #include <proto/dumpstats.h>
 #include <proto/hdr_idx.h>
 #include <proto/log.h>
@@ -249,6 +250,8 @@ int sess_update_st_cer(struct session *s, struct stream_interface *si)
 {
        /* we probably have to release last session from the server */
        if (s->srv) {
+               health_adjust(s->srv, HANA_STATUS_L4_ERR);
+
                if (s->flags & SN_CURR_SESS) {
                        s->flags &= ~SN_CURR_SESS;
                        s->srv->cur_sess--;
@@ -327,6 +330,9 @@ void sess_establish(struct session *s, struct stream_interface *si)
        struct buffer *req = si->ob;
        struct buffer *rep = si->ib;
 
+       if (s->srv)
+               health_adjust(s->srv, HANA_STATUS_L4_OK);
+
        if (s->be->mode == PR_MODE_TCP) { /* let's allow immediate data connection in this case */
                buffer_set_rlim(rep, rep->size); /* no rewrite needed */