]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
lib/selection: fix interaction of timeouts with reboots
authorVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 7 Mar 2022 17:04:05 +0000 (18:04 +0100)
committerVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 14 Mar 2022 06:40:37 +0000 (07:40 +0100)
We use "monotonic" time-stamps for the dead_since field;
that breaks on system reboots, in which case we reset the stats.
(if the server was categorized as dead)

If the server times out afterwards, we'd fail the condition
`cur_state.consecutive_timeouts == old_state.consecutive_timeouts`
so its stats would not update.  Therefore we'd get stuck forever
in a state where the unusable server has high priority (no_rtt_info).

This commit changes a bit more than was necessary to fix this,
including precision of the stats (in some cases).

NEWS
lib/selection.c
lib/selection.h
lib/selection_forward.c

diff --git a/NEWS b/NEWS
index 0d90b3cdf0cfec4811ecaac9453ea19df0e66533..7686faa788b3c695346ccad346da3031979b8f68 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,7 @@ Bugfixes
 - doh2: fix CORS by adding `access-control-allow-origin: *` (!1246)
 - net: fix listen by interface - add interface suffix to link-local IPv6 (!1253)
 - daemon/tls: fix resumption for outgoing TLS (e.g. TLS_FORWARD) (!1261)
+- nameserver selection: fix interaction of timeouts with reboots (#722, !1269)
 
 
 Knot Resolver 5.4.4 (2022-01-05)
index 5bcc2fda3a398d5d25a8369aff4358c546e143a4..c6eb93eff58b482a95770efd5cf35cb45c2d928f 100644 (file)
@@ -19,7 +19,7 @@
 #define DEFAULT_TIMEOUT 400
 #define MAX_TIMEOUT 10000
 #define EXPLORE_TIMEOUT_COEFFICIENT 2
-#define MAX_BACKOFF 5
+#define MAX_BACKOFF 8
 #define MINIMAL_TIMEOUT_ADDITION 20
 
 /* After TCP_TIMEOUT_THRESHOLD timeouts one transport, we'll switch to TCP. */
@@ -255,7 +255,7 @@ static void invalidate_dead_upstream(struct address_state *state,
                                     unsigned int retry_timeout)
 {
        struct rtt_state *rs = &state->rtt_state;
-       if (rs->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) {
+       if (rs->dead_since) {
                uint64_t now = kr_now();
                if (now < rs->dead_since) {
                        // broken continuity of timestamp (reboot, different machine, etc.)
@@ -590,36 +590,50 @@ void update_rtt(struct kr_query *qry, struct address_state *addr_state,
        }
 }
 
-static void cache_timeout(const struct kr_query *qry, const struct kr_transport *transport,
+/// Update rtt_state (including caching) after a server timed out.
+static void server_timeout(const struct kr_query *qry, const struct kr_transport *transport,
                          struct address_state *addr_state, struct kr_cache *cache)
 {
-       if (transport->deduplicated) {
-               /* Transport was chosen by a different query, that one will
-                * cache the result. */
+       // Make sure that the timeout wasn't capped; see kr_transport::timeout_capped
+       if (transport->timeout_capped)
                return;
-       }
 
-       uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
+       const uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
        if (transport->address_len == sizeof(struct in6_addr))
                no6_timed_out(qry, address);
 
-       struct rtt_state old_state = addr_state->rtt_state;
-       struct rtt_state cur_state =
-               get_rtt_state(address, transport->address_len, cache);
-
-       /* We could lose some update from some other process by doing this,
-        * but at least timeout count can't blow up. */
-       if (cur_state.consecutive_timeouts == old_state.consecutive_timeouts) {
-               if (++cur_state.consecutive_timeouts >=
-                   KR_NS_TIMEOUT_ROW_DEAD) {
-                       cur_state.dead_since = kr_now();
-               }
-               put_rtt_state(address, transport->address_len, cur_state, cache);
+       struct rtt_state *state = &addr_state->rtt_state;
+       // While we were waiting for timeout, the stats might have changed considerably,
+       // so let's overwrite what we had by fresh cache contents.
+       // This is useful when the address is busy (we query it concurrently).
+       *state = get_rtt_state(address, transport->address_len, cache);
+
+       ++state->consecutive_timeouts;
+       // Avoid overflow; we don't utilize very high values anyway (arbitrary limit).
+       state->consecutive_timeouts = MIN(64, state->consecutive_timeouts);
+       if (state->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) {
+               // Only mark as dead if we waited long enough,
+               // so that many (concurrent) short attempts can't cause the dead state.
+               if (transport->timeout >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT)
+                       state->dead_since = kr_now();
+       }
+
+       // If transport was chosen by a different query, that one will cache it.
+       if (!transport->deduplicated) {
+               put_rtt_state(address, transport->address_len, *state, cache);
        } else {
-               /* `get_rtt_state` opens a cache transaction, we have to end it. */
-               kr_cache_commit(cache);
+               kr_cache_commit(cache); // Avoid any risk of long transaction.
        }
 }
+// Not everything can be checked in nice ways like static_assert()
+static __attribute__((constructor)) void test_RTT_consts(void)
+{
+       // See KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT above.
+       kr_require(
+               calc_timeout((struct rtt_state){ .consecutive_timeouts = MAX_BACKOFF, })
+                >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT
+       );
+}
 
 void error(struct kr_query *qry, struct address_state *addr_state,
           const struct kr_transport *transport,
@@ -644,12 +658,7 @@ void error(struct kr_query *qry, struct address_state *addr_state,
        case KR_SELECTION_TLS_HANDSHAKE_FAILED:
        case KR_SELECTION_QUERY_TIMEOUT:
                qry->server_selection.local_state->timeouts++;
-               /* Make sure that the query was chosen by this query and timeout wasn't capped
-                * (see kr_transport::timeout_capped for details). */
-               if (!transport->deduplicated && !transport->timeout_capped) {
-                       cache_timeout(qry, transport, addr_state,
-                                     &qry->request->ctx->cache);
-               }
+               server_timeout(qry, transport, addr_state, &qry->request->ctx->cache);
                break;
        case KR_SELECTION_FORMERR:
                if (qry->flags.NO_EDNS) {
index 9e712cc454b012f06e251875feaefacaf814c336..e35eb750662d2990b2ed90206aad08e80cba6070 100644 (file)
 
 #include "lib/cache/api.h"
 
-/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts, we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
+/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts
+ * where at least one was over KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT ms,
+ * we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
 #define KR_NS_TIMEOUT_ROW_DEAD 4
+#define KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT 800 /* == DEFAULT_TIMEOUT * 2 */
 #define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
 
 /**
index baab3abd51b1a0e077080f104a02e92354aa237a..fe33c237313e5083ba86353ce75e655dc5cee091 100644 (file)
@@ -8,6 +8,11 @@
 #define VERBOSE_MSG(qry, ...) kr_log_q((qry), SELECTION, __VA_ARGS__)
 
 #define FORWARDING_TIMEOUT 2000
+/* TODO: well, this is a bit hard; maybe we'd better have a different approach
+ * for estimating DEAD-ness for forwarding.
+ * Even ACKs on connections might be useful here. */
+static_assert(FORWARDING_TIMEOUT >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT,
+               "Bad combination of NS selection limits.");
 
 struct forward_local_state {
        kr_sockaddr_array_t *targets;