From: Aurelien DARRAGON Date: Thu, 13 Jun 2024 17:31:29 +0000 (+0200) Subject: DEBUG: hlua: distinguish burst timeout errors from exec timeout errors X-Git-Tag: v3.1-dev2~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=983513d901bb7511ea6b1e8c3bb00d58a9d432f2;p=thirdparty%2Fhaproxy.git DEBUG: hlua: distinguish burst timeout errors from exec timeout errors hlua burst timeout was introduced in 58e36e5b1 ("MEDIUM: hlua: introduce tune.lua.burst-timeout"). It is a safety measure that allows to detect when too much time is spent on a single lua execution (between 2 interruptions/yields), meaning that the current thread is not able to perform other tasks. Such scenario should be avoided because it will cause thread contention which may have negative performance impact and could cause the watchdog to trigger. When the burst timeout is exceeded, the current Lua execution is aborted and a timeout error is reported to the user. Unfortunately, the same error is currently being reported for cumulative (AKA execution) timeout and for burst timeout, which may be confusing to the user. Indeed, "execution timeout" error historically results from the current hlua context exceeding the total (cumulative) time it's allowed to run. It is set per lua context using the dedicated tunables: - tune.lua.session-timeout - tune.lua.task-timeout - tune.lua.service-timeout We've already faced an user report where the user was able to trigger the burst timeout and got "Lua task: execution timeout." error while the user didn't set cumulative timeout. Thus the error was actually confusing because it was indeed the burst timeout which was causing it due to the use of cpu-intensive call from within the task without sufficient manual "yield" keypoints around the cpu-intensive call to ensure it runs on a dedicated scheduler cycle. In this patch we make it so burst timeout related errors are reported as "burst timeout" errors instead of "execution timeout" errors (which in fact became the generic timeout errors catchall with 58e36e5b1). To do this, hlua_timer_check() now returns a different value depending if the exeeded timeout is the burst one or the cumulative one, which allows us to return either HLUA_E_ETMOUT or HLUA_E_BTMOUT in hlua_ctx_resume(). It should improve the situation described in GH #2356 and may possibly be backported with 58e36e5b1 to improve error reporting if it applies without resistance. --- diff --git a/include/haproxy/hlua-t.h b/include/haproxy/hlua-t.h index af54d86943..bbbc61f287 100644 --- a/include/haproxy/hlua-t.h +++ b/include/haproxy/hlua-t.h @@ -94,6 +94,7 @@ enum hlua_exec { HLUA_E_AGAIN, /* LUA yield, must resume the stack execution later, when the associatedtask is waked. */ HLUA_E_ETMOUT, /* Execution timeout */ + HLUA_E_BTMOUT, /* Burst timeout */ HLUA_E_NOMEM, /* Out of memory error */ HLUA_E_YIELD, /* LUA code try to yield, and this is not allowed */ HLUA_E_ERRMSG, /* LUA stack execution failed with a string error message diff --git a/src/hlua.c b/src/hlua.c index 8434ed73a1..0a0e2be751 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -564,15 +564,16 @@ static inline void hlua_timer_stop(struct hlua_timer *timer) hlua resume, ie: time between effective yields) * - then check for yield cumulative timeout * - * Returns 1 if the check succeeded and 0 if it failed - * (ie: timeout exceeded) + * Returns 1 if the check succeeded, 0 if it failed because cumulative + * timeout is exceeded, and -1 if it failed because burst timeout is + * exceeded. */ static inline int hlua_timer_check(const struct hlua_timer *timer) { uint32_t pburst = _hlua_time_burst(timer); /* pending burst time in ms */ if (hlua_timeout_burst && (timer->burst + pburst) > hlua_timeout_burst) - return 0; /* burst timeout exceeded */ + return -1; /* burst timeout exceeded */ if (timer->max && (timer->cumulative + timer->burst + pburst) > timer->max) return 0; /* cumulative timeout exceeded */ return 1; /* ok */ @@ -1883,6 +1884,7 @@ static struct hlua *hlua_stream_ctx_prepare(struct stream *s, int state_id) void hlua_hook(lua_State *L, lua_Debug *ar) { struct hlua *hlua; + int timer_check; /* Get hlua struct, or NULL if we execute from main lua state */ hlua = hlua_gethlua(L); @@ -1937,8 +1939,12 @@ void hlua_hook(lua_State *L, lua_Debug *ar) check_timeout: /* If we cannot yield, check the timeout. */ - if (!hlua_timer_check(&hlua->timer)) { - lua_pushfstring(L, "execution timeout"); + timer_check = hlua_timer_check(&hlua->timer); + if (timer_check <= 0) { + if (!timer_check) + lua_pushfstring(L, "execution timeout"); + else + lua_pushfstring(L, "burst timeout"); WILL_LJMP(lua_error(L)); } @@ -2037,10 +2043,18 @@ resume_execution: /* Check if the execution timeout is expired. If it is the case, we * break the Lua execution. */ - if (!hlua_timer_check(&lua->timer)) { - lua_settop(lua->T, 0); /* Empty the stack. */ - ret = HLUA_E_ETMOUT; - break; + { + int timer_check; + + timer_check = hlua_timer_check(&lua->timer); + if (timer_check <= 0) { + if (!timer_check) + ret = HLUA_E_ETMOUT; + else + ret = HLUA_E_BTMOUT; + lua_settop(lua->T, 0); /* Empty the stack. */ + break; + } } /* Process the forced yield. if the general yield is not allowed or * if no task were associated this the current Lua execution @@ -2137,6 +2151,7 @@ resume_execution: break; case HLUA_E_ETMOUT: + case HLUA_E_BTMOUT: case HLUA_E_NOMEM: case HLUA_E_YIELD: case HLUA_E_ERR: @@ -9243,6 +9258,9 @@ struct task *hlua_process_task(struct task *task, void *context, unsigned int st case HLUA_E_ETMOUT: SEND_ERR(NULL, "Lua task: execution timeout.\n"); goto err_task_abort; + case HLUA_E_BTMOUT: + SEND_ERR(NULL, "Lua task: burst timeout.\n"); + goto err_task_abort; case HLUA_E_ERRMSG: hlua_lock(hlua); SEND_ERR(NULL, "Lua task: %s.\n", hlua_tostring_safe(hlua->T, -1)); @@ -9475,6 +9493,10 @@ static void hlua_event_handler(struct hlua *hlua) SEND_ERR(NULL, "Lua event_hdl: execution timeout.\n"); break; + case HLUA_E_BTMOUT: + SEND_ERR(NULL, "Lua event_hdl: burst timeout.\n"); + break; + case HLUA_E_ERRMSG: hlua_lock(hlua); SEND_ERR(NULL, "Lua event_hdl: %s.\n", hlua_tostring_safe(hlua->T, -1)); @@ -10193,6 +10215,10 @@ static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp, SEND_ERR(stream->be, "Lua converter '%s': execution timeout.\n", fcn->name); return 0; + case HLUA_E_BTMOUT: + SEND_ERR(stream->be, "Lua converter '%s': burst timeout.\n", fcn->name); + return 0; + case HLUA_E_NOMEM: SEND_ERR(stream->be, "Lua converter '%s': out of memory error.\n", fcn->name); return 0; @@ -10328,6 +10354,10 @@ static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp SEND_ERR(smp->px, "Lua sample-fetch '%s': execution timeout.\n", fcn->name); return 0; + case HLUA_E_BTMOUT: + SEND_ERR(smp->px, "Lua sample-fetch '%s': burst timeout.\n", fcn->name); + return 0; + case HLUA_E_NOMEM: SEND_ERR(smp->px, "Lua sample-fetch '%s': out of memory error.\n", fcn->name); return 0; @@ -10686,6 +10716,10 @@ static enum act_return hlua_action(struct act_rule *rule, struct proxy *px, SEND_ERR(px, "Lua function '%s': execution timeout.\n", rule->arg.hlua_rule->fcn->name); goto end; + case HLUA_E_BTMOUT: + SEND_ERR(px, "Lua function '%s': burst timeout.\n", rule->arg.hlua_rule->fcn->name); + goto end; + case HLUA_E_NOMEM: SEND_ERR(px, "Lua function '%s': out of memory error.\n", rule->arg.hlua_rule->fcn->name); goto end; @@ -10866,6 +10900,11 @@ void hlua_applet_tcp_fct(struct appctx *ctx) rule->arg.hlua_rule->fcn->name); goto error; + case HLUA_E_BTMOUT: + SEND_ERR(px, "Lua applet tcp '%s': burst timeout.\n", + rule->arg.hlua_rule->fcn->name); + goto error; + case HLUA_E_NOMEM: SEND_ERR(px, "Lua applet tcp '%s': out of memory error.\n", rule->arg.hlua_rule->fcn->name); @@ -11079,6 +11118,11 @@ void hlua_applet_http_fct(struct appctx *ctx) rule->arg.hlua_rule->fcn->name); goto error; + case HLUA_E_BTMOUT: + SEND_ERR(px, "Lua applet http '%s': burst timeout.\n", + rule->arg.hlua_rule->fcn->name); + goto error; + case HLUA_E_NOMEM: SEND_ERR(px, "Lua applet http '%s': out of memory error.\n", rule->arg.hlua_rule->fcn->name); @@ -11707,6 +11751,11 @@ static int hlua_cli_io_handler_fct(struct appctx *appctx) fcn->name); return 1; + case HLUA_E_BTMOUT: + SEND_ERR(NULL, "Lua converter '%s': burst timeout.\n", + fcn->name); + return 1; + case HLUA_E_NOMEM: SEND_ERR(NULL, "Lua converter '%s': out of memory error.\n", fcn->name); @@ -12160,6 +12209,10 @@ static int hlua_filter_new(struct stream *s, struct filter *filter) SEND_ERR(s->be, "Lua filter '%s' : 'new' execution timeout.\n", conf->reg->name); ret = 0; goto end; + case HLUA_E_BTMOUT: + SEND_ERR(s->be, "Lua filter '%s' : 'new' burst timeout.\n", conf->reg->name); + ret = 0; + goto end; case HLUA_E_NOMEM: SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name); ret = 0; @@ -12359,6 +12412,9 @@ static int hlua_filter_callback(struct stream *s, struct filter *filter, const c case HLUA_E_ETMOUT: SEND_ERR(s->be, "Lua filter '%s' : '%s' callback execution timeout.\n", conf->reg->name, fun); goto end; + case HLUA_E_BTMOUT: + SEND_ERR(s->be, "Lua filter '%s' : '%s' callback burst timeout.\n", conf->reg->name, fun); + goto end; case HLUA_E_NOMEM: SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name); goto end;