From: Vladimír Čunát Date: Mon, 7 Mar 2022 17:04:05 +0000 (+0100) Subject: lib/selection: fix interaction of timeouts with reboots X-Git-Tag: v5.5.0~2^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=03d24b4b7e5d473bc988551ebc00d955dae01e2b;p=thirdparty%2Fknot-resolver.git lib/selection: fix interaction of timeouts with reboots We use "monotonic" time-stamps for the dead_since field; that breaks on system reboots, in which case we reset the stats. (if the server was categorized as dead) If the server times out afterwards, we'd fail the condition `cur_state.consecutive_timeouts == old_state.consecutive_timeouts` so its stats would not update. Therefore we'd get stuck forever in a state where the unusable server has high priority (no_rtt_info). This commit changes a bit more than was necessary to fix this, including precision of the stats (in some cases). --- diff --git a/NEWS b/NEWS index 0d90b3cdf..7686faa78 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,7 @@ Bugfixes - doh2: fix CORS by adding `access-control-allow-origin: *` (!1246) - net: fix listen by interface - add interface suffix to link-local IPv6 (!1253) - daemon/tls: fix resumption for outgoing TLS (e.g. TLS_FORWARD) (!1261) +- nameserver selection: fix interaction of timeouts with reboots (#722, !1269) Knot Resolver 5.4.4 (2022-01-05) diff --git a/lib/selection.c b/lib/selection.c index 5bcc2fda3..c6eb93eff 100644 --- a/lib/selection.c +++ b/lib/selection.c @@ -19,7 +19,7 @@ #define DEFAULT_TIMEOUT 400 #define MAX_TIMEOUT 10000 #define EXPLORE_TIMEOUT_COEFFICIENT 2 -#define MAX_BACKOFF 5 +#define MAX_BACKOFF 8 #define MINIMAL_TIMEOUT_ADDITION 20 /* After TCP_TIMEOUT_THRESHOLD timeouts one transport, we'll switch to TCP. */ @@ -255,7 +255,7 @@ static void invalidate_dead_upstream(struct address_state *state, unsigned int retry_timeout) { struct rtt_state *rs = &state->rtt_state; - if (rs->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) { + if (rs->dead_since) { uint64_t now = kr_now(); if (now < rs->dead_since) { // broken continuity of timestamp (reboot, different machine, etc.) @@ -590,36 +590,50 @@ void update_rtt(struct kr_query *qry, struct address_state *addr_state, } } -static void cache_timeout(const struct kr_query *qry, const struct kr_transport *transport, +/// Update rtt_state (including caching) after a server timed out. +static void server_timeout(const struct kr_query *qry, const struct kr_transport *transport, struct address_state *addr_state, struct kr_cache *cache) { - if (transport->deduplicated) { - /* Transport was chosen by a different query, that one will - * cache the result. */ + // Make sure that the timeout wasn't capped; see kr_transport::timeout_capped + if (transport->timeout_capped) return; - } - uint8_t *address = ip_to_bytes(&transport->address, transport->address_len); + const uint8_t *address = ip_to_bytes(&transport->address, transport->address_len); if (transport->address_len == sizeof(struct in6_addr)) no6_timed_out(qry, address); - struct rtt_state old_state = addr_state->rtt_state; - struct rtt_state cur_state = - get_rtt_state(address, transport->address_len, cache); - - /* We could lose some update from some other process by doing this, - * but at least timeout count can't blow up. */ - if (cur_state.consecutive_timeouts == old_state.consecutive_timeouts) { - if (++cur_state.consecutive_timeouts >= - KR_NS_TIMEOUT_ROW_DEAD) { - cur_state.dead_since = kr_now(); - } - put_rtt_state(address, transport->address_len, cur_state, cache); + struct rtt_state *state = &addr_state->rtt_state; + // While we were waiting for timeout, the stats might have changed considerably, + // so let's overwrite what we had by fresh cache contents. + // This is useful when the address is busy (we query it concurrently). + *state = get_rtt_state(address, transport->address_len, cache); + + ++state->consecutive_timeouts; + // Avoid overflow; we don't utilize very high values anyway (arbitrary limit). + state->consecutive_timeouts = MIN(64, state->consecutive_timeouts); + if (state->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) { + // Only mark as dead if we waited long enough, + // so that many (concurrent) short attempts can't cause the dead state. + if (transport->timeout >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT) + state->dead_since = kr_now(); + } + + // If transport was chosen by a different query, that one will cache it. + if (!transport->deduplicated) { + put_rtt_state(address, transport->address_len, *state, cache); } else { - /* `get_rtt_state` opens a cache transaction, we have to end it. */ - kr_cache_commit(cache); + kr_cache_commit(cache); // Avoid any risk of long transaction. } } +// Not everything can be checked in nice ways like static_assert() +static __attribute__((constructor)) void test_RTT_consts(void) +{ + // See KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT above. + kr_require( + calc_timeout((struct rtt_state){ .consecutive_timeouts = MAX_BACKOFF, }) + >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT + ); +} void error(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, @@ -644,12 +658,7 @@ void error(struct kr_query *qry, struct address_state *addr_state, case KR_SELECTION_TLS_HANDSHAKE_FAILED: case KR_SELECTION_QUERY_TIMEOUT: qry->server_selection.local_state->timeouts++; - /* Make sure that the query was chosen by this query and timeout wasn't capped - * (see kr_transport::timeout_capped for details). */ - if (!transport->deduplicated && !transport->timeout_capped) { - cache_timeout(qry, transport, addr_state, - &qry->request->ctx->cache); - } + server_timeout(qry, transport, addr_state, &qry->request->ctx->cache); break; case KR_SELECTION_FORMERR: if (qry->flags.NO_EDNS) { diff --git a/lib/selection.h b/lib/selection.h index 9e712cc45..e35eb7506 100644 --- a/lib/selection.h +++ b/lib/selection.h @@ -12,8 +12,11 @@ #include "lib/cache/api.h" -/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts, we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */ +/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts + * where at least one was over KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT ms, + * we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */ #define KR_NS_TIMEOUT_ROW_DEAD 4 +#define KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT 800 /* == DEFAULT_TIMEOUT * 2 */ #define KR_NS_TIMEOUT_RETRY_INTERVAL 1000 /** diff --git a/lib/selection_forward.c b/lib/selection_forward.c index baab3abd5..fe33c2373 100644 --- a/lib/selection_forward.c +++ b/lib/selection_forward.c @@ -8,6 +8,11 @@ #define VERBOSE_MSG(qry, ...) kr_log_q((qry), SELECTION, __VA_ARGS__) #define FORWARDING_TIMEOUT 2000 +/* TODO: well, this is a bit hard; maybe we'd better have a different approach + * for estimating DEAD-ness for forwarding. + * Even ACKs on connections might be useful here. */ +static_assert(FORWARDING_TIMEOUT >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT, + "Bad combination of NS selection limits."); struct forward_local_state { kr_sockaddr_array_t *targets;