From b30bd7adba321efa2534cab418069ce0b0cc284f Mon Sep 17 00:00:00 2001 From: Aurelien DARRAGON Date: Tue, 19 Sep 2023 10:51:53 +0200 Subject: [PATCH] MEDIUM: log/balance: support for the "hash" lb algorithm hash lb algorithm can be configured with the "log-balance hash " directive. With this algorithm, the user specifies a converter list with . The produced log message will be passed as-is to the provided converter list, and the resulting hash will be used to select the log server that will receive the log message. --- doc/configuration.txt | 26 +++++++++++++ include/haproxy/backend.h | 2 + src/backend.c | 15 +++++++- src/cfgparse-listen.c | 4 +- src/cfgparse.c | 7 +++- src/log.c | 78 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 126 insertions(+), 6 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 73652aa9bc..6f85c10134 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -8849,6 +8849,19 @@ log-balance [ ] pool of available servers as it may avoid the hammering effect that could result from roundrobin in this situation. + hash should be found in the form: + e.g.: log-balance hash + + Each log message will be passed to the converter list + specified in (ie: "cnv1,cnv2..."), and it will + then be passed to haproxy hashing function according to + "hash-type" settings. The resulting hash will be used to + select the destination server among the ones declared in the + log backend. The goal of this algorithm is to be able to + extract a key within the final log message using string + converters and then be able to stick to the same server thanks + to the hash. Only "map-based" hashes are supported for now. + is an optional list of arguments which may be needed by some algorithms. @@ -8862,6 +8875,7 @@ log-balance [ ] global log backend@mylog-rrb local0 # send all logs to mylog-rrb backend + log backend@mylog-hash local0 # send all logs to mylog-hash backend backend mylog-rrb mode log @@ -8870,6 +8884,18 @@ log-balance [ ] server s1 udp@127.0.0.1:514 # will receive 50% of log messages server s2 udp@127.0.0.1:514 + backend mylog-hash + mode log + + # extract "METHOD URL PROTO" at the end of the log message, + # and let haproxy hash it so that log messages generated from + # similar requests get sent to the same syslog server: + log-balance hash 'field(-2,\")' + + # server list here + server s1 127.0.0.1:514 + #... + log-format Specifies the log format string to use for traffic logs May be used in sections: defaults | frontend | listen | backend diff --git a/include/haproxy/backend.h b/include/haproxy/backend.h index a5623494fd..581c5a07df 100644 --- a/include/haproxy/backend.h +++ b/include/haproxy/backend.h @@ -147,6 +147,8 @@ static inline int srv_lb_status_changed(const struct server *srv) */ void set_backend_down(struct proxy *be); +unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len); + #endif /* _HAPROXY_BACKEND_H */ /* diff --git a/src/backend.c b/src/backend.c index 809d922271..94e0032fc9 100644 --- a/src/backend.c +++ b/src/backend.c @@ -70,7 +70,7 @@ int be_lastsession(const struct proxy *be) } /* helper function to invoke the correct hash method */ -static unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len) +unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len) { unsigned int hash; @@ -2855,8 +2855,19 @@ int backend_parse_log_balance(const char **args, char **err, struct proxy *curpr curproxy->lbprm.algo &= ~BE_LB_ALGO; curproxy->lbprm.algo |= BE_LB_ALGO_RND; } + else if (strcmp(args[0], "hash") == 0) { + if (!*args[1]) { + memprintf(err, "%s requires a converter list.", args[0]); + return -1; + } + curproxy->lbprm.algo &= ~BE_LB_ALGO; + curproxy->lbprm.algo |= BE_LB_ALGO_SMP; + + ha_free(&curproxy->lbprm.arg_str); + curproxy->lbprm.arg_str = strdup(args[1]); + } else { - memprintf(err, "only supports 'roundrobin', 'sticky', 'random', options"); + memprintf(err, "only supports 'roundrobin', 'sticky', 'random', 'hash' options"); return -1; } return 0; diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c index f039a4eb37..7ed67aa328 100644 --- a/src/cfgparse-listen.c +++ b/src/cfgparse-listen.c @@ -2576,8 +2576,8 @@ stats_error_parsing: */ curproxy->lbprm.algo &= ~(BE_LB_HASH_TYPE | BE_LB_HASH_FUNC | BE_LB_HASH_MOD); - if (curproxy->mode != PR_MODE_TCP && curproxy->mode != PR_MODE_HTTP) { - ha_alert("parsing [%s:%d] : '%s' requires TCP or HTTP mode.\n", file, linenum, args[0]); + if (curproxy->mode != PR_MODE_TCP && curproxy->mode != PR_MODE_HTTP && curproxy->mode != PR_MODE_SYSLOG) { + ha_alert("parsing [%s:%d] : '%s' requires TCP, HTTP or LOG mode.\n", file, linenum, args[0]); err_code |= ERR_ALERT | ERR_FATAL; goto out; } diff --git a/src/cfgparse.c b/src/cfgparse.c index 963894e0c4..ef18bfbeef 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -3510,8 +3510,11 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - /* "balance hash" needs to compile its expression */ - if ((curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) { + /* "balance hash" needs to compile its expression + * (log backends will handle this in proxy log postcheck) + */ + if (curproxy->mode != PR_MODE_SYSLOG && + (curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) { int idx = 0; const char *args[] = { curproxy->lbprm.arg_str, diff --git a/src/log.c b/src/log.c index 53011f7f7c..bcae3a6973 100644 --- a/src/log.c +++ b/src/log.c @@ -42,6 +42,7 @@ #include #include #include +#include #include /* global recv logs counter */ @@ -907,6 +908,65 @@ static int postcheck_log_backend(struct proxy *be) be->srv_act = 0; be->srv_bck = 0; + /* "log-balance hash" needs to compile its expression */ + if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) { + struct sample_expr *expr; + char *expr_str = NULL; + char *err_str = NULL; + int idx = 0; + + /* only map-based hash method is supported for now */ + if ((be->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP) { + memprintf(&msg, "unsupported hash method (from \"hash-type\")"); + err_code |= ERR_ALERT | ERR_FATAL; + goto end; + } + + /* a little bit of explanation about what we're going to do here: + * as the user gave us a list of converters, instead of the fetch+conv list + * tuple as we're used to, we need to insert a dummy fetch at the start of + * the converter list so that sample_parse_expr() is able to properly parse + * the expr. We're explicitly using str() as dummy fetch, since the input + * sample that will be passed to the converter list at runtime will be a + * string (the log message about to be sent). Doing so allows sample_parse_expr() + * to ensure that the provided converters will be compatible with string type. + */ + memprintf(&expr_str, "str(dummy),%s", be->lbprm.arg_str); + if (!expr_str) { + memprintf(&msg, "memory error during converter list argument parsing (from \"log-balance hash\")"); + err_code |= ERR_ALERT | ERR_FATAL; + goto end; + } + expr = sample_parse_expr((char*[]){expr_str, NULL}, &idx, + be->conf.file, + be->conf.line, + &err_str, NULL, NULL); + if (!expr) { + memprintf(&msg, "%s (from converter list argument in \"log-balance hash\")", err_str); + ha_free(&err_str); + err_code |= ERR_ALERT | ERR_FATAL; + ha_free(&expr_str); + goto end; + } + + /* We expect the log_message->conv_list expr to resolve as a binary-compatible + * value because its output will be passed to gen_hash() to compute the hash. + * + * So we check the last converter's output type to ensure that it can be + * converted into the expected type. Invalid output type will result in an + * error to prevent unexpected results during runtime. + */ + if (sample_casts[smp_expr_output_type(expr)][SMP_T_BIN] == NULL) { + memprintf(&msg, "invalid output type at the end of converter list for \"log-balance hash\" directive"); + err_code |= ERR_ALERT | ERR_FATAL; + release_sample_expr(expr); + ha_free(&expr_str); + goto end; + } + ha_free(&expr_str); + be->lbprm.expr = expr; + } + /* finish the initialization of proxy's servers */ srv = be->srv; while (srv) { @@ -2120,6 +2180,24 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr /* random mode */ targetid = statistical_prng() % nb_srv; } + else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) { + struct sample result; + + /* log-balance hash */ + memset(&result, 0, sizeof(result)); + result.data.type = SMP_T_STR; + result.flags = SMP_F_CONST; + result.data.u.str.area = message; + result.data.u.str.data = size; + result.data.u.str.size = size + 1; /* with terminating NULL byte */ + if (sample_process_cnv(be->lbprm.expr, &result)) { + /* gen_hash takes binary input, ensure that we provide such value to it */ + if (result.data.type == SMP_T_BIN || sample_casts[result.data.type][SMP_T_BIN]) { + sample_casts[result.data.type][SMP_T_BIN](&result); + targetid = gen_hash(be, result.data.u.str.area, result.data.u.str.data) % nb_srv; + } + } + } skip_lb: -- 2.39.5