* Debugging
- anonkey
- quiet
+ - warn-blocked-traffic-after
- zero-warning
* HTTPClient
Do not display any message during startup. It is equivalent to the command-
line argument "-q".
+warn-blocked-traffic-after <time>
+ This allows to adjust the delay after which a stuck task blocking the traffic
+ will trigger the emission of a warning on the standard error output. The
+ delay is expressed in milliseconds and defaults to 1000 ms. Permitted values
+ must be comprised between 1 ms and 1000 ms included. Lower values will
+ trigger warnings frequently and higher ones will rarely. The watchdog will
+ kill a runaway task that fails to respond twice for one second anyway, so a
+ 1000 ms warning delay will normally not trigger any warning. It is
+ recommended to stay with values between 10 and 100ms to detect configuration
+ anomalies that may degrade the user's experience, causing long response times
+ or jerkiness on interactive sessions. For example, a poorly designed Lua
+ sample-fetch function doing heavy computations, or a very large map_reg or
+ map_regm map file with a very high evaluation cost may cause such trouble.
+ For comparison a TLS handshake can eat between one and two milliseconds, and
+ compressing a 16kB HTTP response buffer is around one millisecond. The output
+ contains a thread dump of the offending task with a backtrace and some
+ context that helps figure where the time is being spent.
+
zero-warning
When this option is set, HAProxy will refuse to start if any warning was
emitted while processing the configuration and applying it. It means that
" this must remain exceptional because the system's stability is now at risk.\n"
" Timers in logs may be reported incorrectly, spurious timeouts may happen,\n"
" some incoming connections may silently be dropped, health checks may\n"
- " randomly fail, and accesses to the CLI may block the whole process. Please\n"
- " check the trace below for any clues about configuration elements that need\n"
- " to be corrected:\n\n",
+ " randomly fail, and accesses to the CLI may block the whole process. The\n"
+ " blocking delay before emitting this warning may be adjusted via the global\n"
+ " 'warn-blocked-traffic-after' directive. Please check the trace below for\n"
+ " any clues about configuration elements that need to be corrected:\n\n",
thr + 1, (n - p) / 1000000ULL,
HA_ATOMIC_LOAD(&ha_thread_ctx[thr].stream_cnt));
#include <haproxy/activity.h>
#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
#include <haproxy/clock.h>
#include <haproxy/debug.h>
#include <haproxy/errors.h>
uint prev_ctxsw;
} per_thread_wd_ctx[MAX_THREADS];
+/* warn about stuck tasks after this delay (ns) */
+static unsigned int wdt_warn_blocked_traffic_ns = 1000000000U;
+
/* Setup (or ping) the watchdog timer for thread <thr>. Returns non-zero on
* success, zero on failure. It interrupts once per second of CPU time. It
* happens that timers based on the CPU time are not automatically re-armed
{
struct itimerspec its;
- its.it_value.tv_sec = 1; its.it_value.tv_nsec = 0;
+ its.it_value.tv_sec = wdt_warn_blocked_traffic_ns / 1000000000U;
+ its.it_value.tv_nsec = wdt_warn_blocked_traffic_ns % 1000000000U;
its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0;
return timer_settime(per_thread_wd_ctx[thr].timer, 0, &its, NULL) == 0;
}
/* not yet reached the deadline of 1 sec,
* or p wasn't initialized yet
*/
- if (!p || n - p < 1000000000UL)
+ if (!p)
goto update_and_leave;
if ((_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_SLEEPING) ||
if (!(_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_STUCK)) {
uint prev_ctxsw;
- _HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK);
prev_ctxsw = HA_ATOMIC_LOAD(&per_thread_wd_ctx[tid].prev_ctxsw);
- if (HA_ATOMIC_LOAD(&activity[thr].ctxsw) == prev_ctxsw)
- ha_stuck_warning(thr);
- HA_ATOMIC_STORE(&activity[thr].ctxsw, prev_ctxsw);
+
+ /* only after one second it's clear we're stuck */
+ if (n - p >= 1000000000ULL)
+ _HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK);
+
+ /* have we crossed the warning boundary ? If so we note were we
+ * where, and second time called from the same place will trigger
+ * a warning (unless already stuck).
+ */
+ if (n - p >= (ullong)wdt_warn_blocked_traffic_ns) {
+ if (HA_ATOMIC_LOAD(&activity[thr].ctxsw) == prev_ctxsw)
+ ha_stuck_warning(thr);
+ HA_ATOMIC_STORE(&activity[thr].ctxsw, prev_ctxsw);
+ }
+
goto update_and_leave;
}
wdt_ping(thr);
}
+/* parse the "warn-blocked-traffic-after" parameter */
+static int wdt_parse_warn_blocked(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+ uint value;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument between 1 and 1000 ms.\n", args[0]);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &value, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 1000 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ else if (value > 1000 || value < 1) {
+ memprintf(err, "timer out of range in argument '%s' to '%s' (value must be between 1 and 1000 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+
+ wdt_warn_blocked_traffic_ns = value * 1000000U;
+ return 0;
+}
+
int init_wdt_per_thread()
{
if (!clock_setup_signal_timer(&per_thread_wd_ctx[tid].timer, WDTSIG, tid))
return ERR_NONE;
}
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "warn-blocked-traffic-after", wdt_parse_warn_blocked },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
REGISTER_POST_CHECK(init_wdt);
REGISTER_PER_THREAD_INIT(init_wdt_per_thread);
REGISTER_PER_THREAD_DEINIT(deinit_wdt_per_thread);