lib/selection: fix interaction of timeouts with reboots

author Vladimír Čunát <vladimir.cunat@nic.cz>

Mon, 7 Mar 2022 17:04:05 +0000 (18:04 +0100)

committer Vladimír Čunát <vladimir.cunat@nic.cz>

Mon, 14 Mar 2022 06:40:37 +0000 (07:40 +0100)
author Vladimír Čunát <vladimir.cunat@nic.cz>
Mon, 7 Mar 2022 17:04:05 +0000 (18:04 +0100)
committer Vladimír Čunát <vladimir.cunat@nic.cz>
Mon, 14 Mar 2022 06:40:37 +0000 (07:40 +0100)
diff --git a/NEWS b/NEWS

index 0d90b3cdf0cfec4811ecaac9453ea19df0e66533..7686faa788b3c695346ccad346da3031979b8f68 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,7 @@ Bugfixes
  - doh2: fix CORS by adding `access-control-allow-origin: *` (!1246)
  - net: fix listen by interface - add interface suffix to link-local IPv6 (!1253)
  - daemon/tls: fix resumption for outgoing TLS (e.g. TLS_FORWARD) (!1261)
+- nameserver selection: fix interaction of timeouts with reboots (#722, !1269)
  
  
  Knot Resolver 5.4.4 (2022-01-05)
diff --git a/lib/selection.c b/lib/selection.c

index 5bcc2fda3a398d5d25a8369aff4358c546e143a4..c6eb93eff58b482a95770efd5cf35cb45c2d928f 100644 (file)
--- a/lib/selection.c
+++ b/lib/selection.c
@@ -19,7 +19,7 @@
  #define DEFAULT_TIMEOUT 400
  #define MAX_TIMEOUT 10000
  #define EXPLORE_TIMEOUT_COEFFICIENT 2
-#define MAX_BACKOFF 5
+#define MAX_BACKOFF 8
  #define MINIMAL_TIMEOUT_ADDITION 20
  
  /* After TCP_TIMEOUT_THRESHOLD timeouts one transport, we'll switch to TCP. */
@@ -255,7 +255,7 @@ static void invalidate_dead_upstream(struct address_state *state,
                                      unsigned int retry_timeout)
  {
         struct rtt_state *rs = &state->rtt_state;
-       if (rs->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) {
+       if (rs->dead_since) {
                 uint64_t now = kr_now();
                 if (now < rs->dead_since) {
                         // broken continuity of timestamp (reboot, different machine, etc.)
@@ -590,36 +590,50 @@ void update_rtt(struct kr_query *qry, struct address_state *addr_state,
         }
  }
  
-static void cache_timeout(const struct kr_query *qry, const struct kr_transport *transport,
+/// Update rtt_state (including caching) after a server timed out.
+static void server_timeout(const struct kr_query *qry, const struct kr_transport *transport,
                           struct address_state *addr_state, struct kr_cache *cache)
  {
-       if (transport->deduplicated) {
-               /* Transport was chosen by a different query, that one will
-                * cache the result. */
+       // Make sure that the timeout wasn't capped; see kr_transport::timeout_capped
+       if (transport->timeout_capped)
                 return;
-       }
  
-       uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
+       const uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
         if (transport->address_len == sizeof(struct in6_addr))
                 no6_timed_out(qry, address);
  
-       struct rtt_state old_state = addr_state->rtt_state;
-       struct rtt_state cur_state =
-               get_rtt_state(address, transport->address_len, cache);
-
-       /* We could lose some update from some other process by doing this,
-        * but at least timeout count can't blow up. */
-       if (cur_state.consecutive_timeouts == old_state.consecutive_timeouts) {
-               if (++cur_state.consecutive_timeouts >=
-                   KR_NS_TIMEOUT_ROW_DEAD) {
-                       cur_state.dead_since = kr_now();
-               }
-               put_rtt_state(address, transport->address_len, cur_state, cache);
+       struct rtt_state *state = &addr_state->rtt_state;
+       // While we were waiting for timeout, the stats might have changed considerably,
+       // so let's overwrite what we had by fresh cache contents.
+       // This is useful when the address is busy (we query it concurrently).
+       *state = get_rtt_state(address, transport->address_len, cache);
+
+       ++state->consecutive_timeouts;
+       // Avoid overflow; we don't utilize very high values anyway (arbitrary limit).
+       state->consecutive_timeouts = MIN(64, state->consecutive_timeouts);
+       if (state->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) {
+               // Only mark as dead if we waited long enough,
+               // so that many (concurrent) short attempts can't cause the dead state.
+               if (transport->timeout >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT)
+                       state->dead_since = kr_now();
+       }
+
+       // If transport was chosen by a different query, that one will cache it.
+       if (!transport->deduplicated) {
+               put_rtt_state(address, transport->address_len, *state, cache);
         } else {
-               /* `get_rtt_state` opens a cache transaction, we have to end it. */
-               kr_cache_commit(cache);
+               kr_cache_commit(cache); // Avoid any risk of long transaction.
         }
  }
+// Not everything can be checked in nice ways like static_assert()
+static __attribute__((constructor)) void test_RTT_consts(void)
+{
+       // See KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT above.
+       kr_require(
+               calc_timeout((struct rtt_state){ .consecutive_timeouts = MAX_BACKOFF, })
+                >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT
+       );
+}
  
  void error(struct kr_query *qry, struct address_state *addr_state,
            const struct kr_transport *transport,
@@ -644,12 +658,7 @@ void error(struct kr_query *qry, struct address_state *addr_state,
         case KR_SELECTION_TLS_HANDSHAKE_FAILED:
         case KR_SELECTION_QUERY_TIMEOUT:
                 qry->server_selection.local_state->timeouts++;
-               /* Make sure that the query was chosen by this query and timeout wasn't capped
-                * (see kr_transport::timeout_capped for details). */
-               if (!transport->deduplicated && !transport->timeout_capped) {
-                       cache_timeout(qry, transport, addr_state,
-                                     &qry->request->ctx->cache);
-               }
+               server_timeout(qry, transport, addr_state, &qry->request->ctx->cache);
                 break;
         case KR_SELECTION_FORMERR:
                 if (qry->flags.NO_EDNS) {
diff --git a/lib/selection.h b/lib/selection.h

index 9e712cc454b012f06e251875feaefacaf814c336..e35eb750662d2990b2ed90206aad08e80cba6070 100644 (file)
--- a/lib/selection.h
+++ b/lib/selection.h
@@ -12,8 +12,11 @@
  
  #include "lib/cache/api.h"
  
-/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts, we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
+/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts
+ * where at least one was over KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT ms,
+ * we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
  #define KR_NS_TIMEOUT_ROW_DEAD 4
+#define KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT 800 /* == DEFAULT_TIMEOUT * 2 */
  #define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
  
  /**
diff --git a/lib/selection_forward.c b/lib/selection_forward.c

index baab3abd51b1a0e077080f104a02e92354aa237a..fe33c237313e5083ba86353ce75e655dc5cee091 100644 (file)
--- a/lib/selection_forward.c
+++ b/lib/selection_forward.c
@@ -8,6 +8,11 @@
  #define VERBOSE_MSG(qry, ...) kr_log_q((qry), SELECTION, __VA_ARGS__)
  
  #define FORWARDING_TIMEOUT 2000
+/* TODO: well, this is a bit hard; maybe we'd better have a different approach
+ * for estimating DEAD-ness for forwarding.
+ * Even ACKs on connections might be useful here. */
+static_assert(FORWARDING_TIMEOUT >= KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT,
+               "Bad combination of NS selection limits.");
  
  struct forward_local_state {
         kr_sockaddr_array_t *targets;
author	Vladimír Čunát <vladimir.cunat@nic.cz>
	Mon, 7 Mar 2022 17:04:05 +0000 (18:04 +0100)
committer	Vladimír Čunát <vladimir.cunat@nic.cz>
	Mon, 14 Mar 2022 06:40:37 +0000 (07:40 +0100)
NEWS		patch \| blob \| blame \| history
lib/selection.c		patch \| blob \| blame \| history
lib/selection.h		patch \| blob \| blame \| history
lib/selection_forward.c		patch \| blob \| blame \| history