From 498cc8ab88b1979eb344e1825fddbb201e2210b6 Mon Sep 17 00:00:00 2001 From: Wouter Wijngaards Date: Tue, 26 Oct 2010 15:02:08 +0000 Subject: [PATCH] - Change of timeout code. No more lost and backoff in blockage. At 12sec timeout (and at least 2x lost before) one probe per IP is allowed only. At 120sec, the IP is blocked. After 15min, a 120sec entry has a single retry packet. git-svn-id: file:///svn/unbound/trunk@2311 be551aaa-1e26-0410-a405-d3ace91eadb9 --- daemon/cachedump.c | 8 +++++--- daemon/remote.c | 5 +++-- doc/Changelog | 4 ++++ services/cache/infra.c | 39 ++++++++++++++++++++++++++++++++++++++- services/cache/infra.h | 8 +++++++- 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/daemon/cachedump.c b/daemon/cachedump.c index 43c1a9a23..85fe9f839 100644 --- a/daemon/cachedump.c +++ b/daemon/cachedump.c @@ -802,8 +802,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) { char buf[257]; struct delegpt_addr* a; - int lame, dlame, rlame, rto, edns_vs, to; - int entry_ttl; + int lame, dlame, rlame, rto, edns_vs, to, delay, entry_ttl; struct rtt_info ri; uint8_t edns_lame_known; for(a = dp->target_list; a; a = a->next_target) { @@ -816,7 +815,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) } /* lookup in infra cache */ entry_ttl = infra_get_host_rto(worker->env.infra_cache, - &a->addr, a->addrlen, &ri, *worker->env.now); + &a->addr, a->addrlen, &ri, &delay, *worker->env.now); if(entry_ttl == -1) { if(!ssl_printf(ssl, "not in infra cache.\n")) return; @@ -840,6 +839,9 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) rlame?"NoAuthButRecursive ":"", rto, entry_ttl, ri.srtt, ri.rttvar, rtt_notimeout(&ri))) return; + if(delay) + if(!ssl_printf(ssl, ", probedelay %d", delay)) + return; if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen, *worker->env.now, &edns_vs, &edns_lame_known, &to)) { if(edns_vs == -1) { diff --git a/daemon/remote.c b/daemon/remote.c index 66e89650a..f78e1d632 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -1572,10 +1572,11 @@ dump_infra_host(struct lruhash_entry* e, void* arg) return; } if(!ssl_printf(a->ssl, "%s ttl %d ping %d var %d rtt %d rto %d " - "ednsknown %d edns %d\n", + "ednsknown %d edns %d delay %d\n", ip_str, (int)(d->ttl - a->now), d->rtt.srtt, d->rtt.rttvar, rtt_notimeout(&d->rtt), d->rtt.rto, - (int)d->edns_lame_known, (int)d->edns_version)) + (int)d->edns_lame_known, (int)d->edns_version, + (int)(a->nowprobedelay?d->probedelay-a->now:0))) return; if(d->lameness) lruhash_traverse(d->lameness, 0, &dump_infra_lame, arg); diff --git a/doc/Changelog b/doc/Changelog index 2e4e6d002..a94467cc1 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,6 +1,10 @@ 26 October 2010: Wouter - dump_infra and flush_infra commands for unbound-control. - no timeout backoff if meanwhile a query succeeded. + - Change of timeout code. No more lost and backoff in blockage. + At 12sec timeout (and at least 2x lost before) one probe per IP + is allowed only. At 120sec, the IP is blocked. After 15min, a + 120sec entry has a single retry packet. 25 October 2010: Wouter - Configure errors if ldns is not found. diff --git a/services/cache/infra.c b/services/cache/infra.c index 4ac51f69b..9e1e3a81c 100644 --- a/services/cache/infra.c +++ b/services/cache/infra.c @@ -49,6 +49,9 @@ #include "util/config_file.h" #include "iterator/iterator.h" +/** Timeout when only a single probe query per IP is allowed. */ +#define PROBE_MAXRTO 12000 /* in msec */ + size_t infra_host_sizefunc(void* k, void* ATTR_UNUSED(d)) { @@ -213,6 +216,7 @@ host_entry_init(struct infra_cache* infra, struct lruhash_entry* e, rtt_init(&data->rtt); data->edns_version = 0; data->edns_lame_known = 0; + data->probedelay = 0; } /** @@ -257,6 +261,7 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr, addrlen, 0); struct infra_host_data* data; + int wr = 0; if(e && ((struct infra_host_data*)e->data)->ttl < timenow) { /* it expired, try to reuse existing entry */ lock_rw_unlock(&e->lock); @@ -266,6 +271,7 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, /* re-initialise */ /* do not touch lameness, it may be valid still */ host_entry_init(infra, e, timenow); + wr = 1; } } if(!e) { @@ -284,6 +290,22 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, *to = rtt_timeout(&data->rtt); *edns_vs = data->edns_version; *edns_lame_known = data->edns_lame_known; + if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) { + /* delay other queries, this is the probe query */ + if(!wr) { + lock_rw_unlock(&e->lock); + e = infra_lookup_host_nottl(infra, addr, addrlen, 1); + if(!e) { /* flushed from cache real fast, no use to + allocate just for the probedelay */ + return 1; + } + data = (struct infra_host_data*)e->data; + } + /* add 999 to round up the timeout value from msec to sec, + * then add a whole second so it is certain that this probe + * has timed out before the next is allowed */ + data->probedelay = timenow + ((*to)+1999)/1000; + } lock_rw_unlock(&e->lock); return 1; } @@ -498,6 +520,7 @@ infra_rtt_update(struct infra_cache* infra, rtt_lost(&data->rtt, orig_rtt); } else { rtt_update(&data->rtt, roundtrip); + data->probedelay = 0; } if(data->rtt.rto > 0) rto = data->rtt.rto; @@ -510,7 +533,7 @@ infra_rtt_update(struct infra_cache* infra, int infra_get_host_rto(struct infra_cache* infra, struct sockaddr_storage* addr, socklen_t addrlen, - struct rtt_info* rtt, uint32_t timenow) + struct rtt_info* rtt, int* delay, uint32_t timenow) { struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr, addrlen, 0); @@ -521,6 +544,9 @@ int infra_get_host_rto(struct infra_cache* infra, if(data->ttl >= timenow) { ttl = (int)(data->ttl - timenow); memmove(rtt, &data->rtt, sizeof(*rtt)); + if(timenow < data->probedelay) + *delay = (int)(data->probedelay - timenow); + else *delay = 0; } lock_rw_unlock(&e->lock); return ttl; @@ -570,6 +596,10 @@ infra_get_lame_rtt(struct infra_cache* infra, return 0; host = (struct infra_host_data*)e->data; *rtt = rtt_unclamped(&host->rtt); + if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay + && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) + /* single probe for this domain, and we are not probing */ + *rtt = USEFUL_SERVER_TOP_TIMEOUT; /* check lameness first, if so, ttl on host does not matter anymore */ if(infra_lookup_lame(host, name, namelen, timenow, &dlm, &rlm, &alm, &olm)) { @@ -604,6 +634,13 @@ infra_get_lame_rtt(struct infra_cache* infra, *dnsseclame = 0; *reclame = 0; if(timenow > host->ttl) { + /* expired entry */ + /* see if this can be a re-probe of an unresponsive server */ + if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) { + *rtt = USEFUL_SERVER_TOP_TIMEOUT-1; + lock_rw_unlock(&e->lock); + return 1; + } lock_rw_unlock(&e->lock); return 0; } diff --git a/services/cache/infra.h b/services/cache/infra.h index 9c203ee4d..376e1ae50 100644 --- a/services/cache/infra.h +++ b/services/cache/infra.h @@ -64,6 +64,8 @@ struct infra_host_key { struct infra_host_data { /** TTL value for this entry. absolute time. */ uint32_t ttl; + /** time in seconds (absolute) when probing re-commences, 0 disabled */ + uint32_t probedelay; /** round trip times for timeout calculation */ struct rtt_info rtt; /** Names of the zones that are lame. NULL=no lame zones. */ @@ -173,6 +175,8 @@ struct infra_host_data* infra_lookup_host(struct infra_cache* infra, * Find host information to send a packet. Creates new entry if not found. * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for * the first message to it. + * Use this to send a packet only, because it also locks out others when + * probing is restricted. * @param infra: infrastructure cache. * @param addr: host address. * @param addrlen: length of addr. @@ -265,6 +269,7 @@ int infra_edns_update(struct infra_cache* infra, /** * Get Lameness information and average RTT if host is in the cache. + * This information is to be used for server selection. * @param infra: infrastructure cache. * @param addr: host address. * @param addrlen: length of addr. @@ -291,12 +296,13 @@ int infra_get_lame_rtt(struct infra_cache* infra, * @param addr: host address. * @param addrlen: length of addr. * @param rtt: the rtt_info is copied into here (caller alloced return struct). + * @param delay: probe delay (if any). * @param timenow: what time it is now. * @return TTL the infra host element is valid for. If -1: not found in cache. */ int infra_get_host_rto(struct infra_cache* infra, struct sockaddr_storage* addr, socklen_t addrlen, - struct rtt_info* rtt, uint32_t timenow); + struct rtt_info* rtt, int* delay, uint32_t timenow); /** * Get memory used by the infra cache. -- 2.47.2