Default 5 minutes (but off).
Randomize the delays +-25%.
kr_layer_t kr_layer_t_static;
_Bool kr_dbg_assumption_abort;
-_Bool kr_dbg_assumption_fork;
+int kr_dbg_assumption_fork;
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
printf "
kr_layer_t kr_layer_t_static;
_Bool kr_dbg_assumption_abort;
-_Bool kr_dbg_assumption_fork;
+int kr_dbg_assumption_fork;
"
printf "
Kresd uses *assumptions*, which are checks that should always pass and indicate
some weird or unexpected state if they don't. In such cases, they show up in
-the log as errors. By default, the process recovers from those states, but the
+the log as errors. By default, the process recovers from those states if possible, but the
behaviour can be changed with the following options to aid further debugging.
.. envvar:: debugging.assumption_abort = false|true
- :return: boolean (default: false)
+ :return: boolean (default: false in meson's release mode, true otherwise)
Allow the process to be aborted in case it encounters a failed assumption.
+ (Some critical conditions always lead to abortion, regardless of settings.)
-.. envvar:: debugging.assumption_fork = true|false
+.. envvar:: debugging.assumption_fork = milliseconds
- :return: boolean (default: true)
+ :return: int (default: 5 minutes in meson's release mode, 0 otherwise)
If a proccess should be aborted, it can be done in two ways. When this is
- set to true (default), a child is forked and aborted to obtain a coredump,
+ set to nonzero (default), a child is forked and aborted to obtain a coredump,
while the parent process recovers and keeps running. This can be useful to
debug a rare issue that occurs in production, since it doesn't affect the
main process.
+
+ As the dumping can be costly, the value is a lower bound on delay between
+ consecutive coredumps of each process. It is randomized by +-25% each time.
/* Logging & debugging */
bool kr_verbose_status = false;
bool kr_dbg_assumption_abort = DBG_ASSUMPTION_ABORT;
-bool kr_dbg_assumption_fork = DBG_ASSUMPTION_FORK;
+int kr_dbg_assumption_fork = DBG_ASSUMPTION_FORK;
void kr_fail(bool is_fatal, const char *expr, const char *func, const char *file, int line)
{
if (is_fatal || (kr_dbg_assumption_abort && !kr_dbg_assumption_fork))
abort();
- else if (kr_dbg_assumption_abort && kr_dbg_assumption_fork)
- fork() == 0 ? abort() : (void)0;
+ else if (!kr_dbg_assumption_abort || !kr_dbg_assumption_fork)
+ return;
+ // We want to fork and abort the child, unless rate-limited.
+ static uint64_t limited_until = 0;
+ const uint64_t now = kr_now();
+ if (now < limited_until)
+ return;
+ if (kr_dbg_assumption_fork > 0) {
+ // Add jitter +- 25%; in other words: 75% + uniform(0,50%).
+ // Motivation: if a persistent problem starts happening, desynchronize
+ // coredumps from different instances as they're not cheap.
+ limited_until = now + kr_dbg_assumption_fork * 3 / 4
+ + kr_dbg_assumption_fork * kr_rand_bytes(1) / 256 / 2;
+ }
+ if (fork() == 0)
+ abort();
}
/*
/** Whether kr_assume() checks should abort. */
KR_EXPORT extern bool kr_dbg_assumption_abort;
-/** Whether kr_assume() should fork the process before issuing abort (if configured).
+/** How often kr_assume() should fork the process before issuing abort (if configured).
*
- * This can be useful for debugging rare edge-cases in production. When both
- * kr_debug_assumption_abort and kr_debug_assumption_fork are set to true, it is
+ * This can be useful for debugging rare edge-cases in production.
+ * if (kr_debug_assumption_abort && kr_debug_assumption_fork), it is
* possible to both obtain a coredump (from forked child) and recover from the
- * non-fatal error in the parent process. */
-KR_EXPORT extern bool kr_dbg_assumption_fork;
+ * non-fatal error in the parent process.
+ *
+ * == 0 (false): no forking
+ * > 0: minimum delay between forks
+ * (in milliseconds, each instance separately, randomized +-25%)
+ * < 0: no rate-limiting (not recommended)
+ */
+KR_EXPORT extern int kr_dbg_assumption_fork;
/** Use kr_require() and kr_assume() instead of directly this function. */
KR_EXPORT KR_COLD void kr_fail(bool is_fatal, const char* expr, const char *func,
conf_data.set('ENABLE_CAP_NG', capng.found().to_int())
conf_data.set('ENABLE_DOH2', nghttp2.found().to_int())
conf_data.set('DBG_ASSUMPTION_ABORT', get_option('debug').to_int())
-conf_data.set('DBG_ASSUMPTION_FORK', (not get_option('debug')).to_int())
+if get_option('debug')
+ conf_data.set('DBG_ASSUMPTION_FORK', '0')
+else
+ conf_data.set('DBG_ASSUMPTION_FORK', '(5 * 60 * 1000) /* five minutes */')
+endif
kresconfig = configure_file(
output: 'kresconfig.h',