From: Vladimír Čunát Date: Tue, 29 Dec 2020 08:28:16 +0000 (+0100) Subject: lib/selection: be more careful around rtt_state.dead_since X-Git-Tag: v5.3.0~30^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=846d576b7f2e560baf038b9cfbafdc4490bea45d;p=thirdparty%2Fknot-resolver.git lib/selection: be more careful around rtt_state.dead_since It's all because the timestamp that we're using isn't (guaranteed to be) meaningful across reboots or different machines, whereas our cache even persists by default. --- diff --git a/lib/selection.c b/lib/selection.c index 6dad1a296..0f88e15c8 100644 --- a/lib/selection.c +++ b/lib/selection.c @@ -179,8 +179,20 @@ static struct rtt_state calc_rtt_state(struct rtt_state old, unsigned new_rtt) static void invalidate_dead_upstream(struct address_state *state, unsigned int retry_timeout) { - if (kr_now() - state->rtt_state.dead_since < retry_timeout) { - state->generation = -1; + struct rtt_state *rs = &state->rtt_state; + if (rs->consecutive_timeouts >= KR_NS_TIMEOUT_ROW_DEAD) { + uint64_t now = kr_now(); + if (now < rs->dead_since) { + // broken continuity of timestamp (reboot, different machine, etc.) + *rs = default_rtt_state; + } else if (now < rs->dead_since + retry_timeout) { + // period when we don't want to use the address + state->generation = -1; + } else { + assert(now >= rs->dead_since + retry_timeout); + // we allow to retry the server now + // TODO: perhaps tweak *rs? + } } }