From 2bdb094f7bf795704d4dbb6cc45e74d6da67c6b9 Mon Sep 17 00:00:00 2001 From: Wouter Wijngaards Date: Thu, 21 Oct 2010 15:11:39 +0000 Subject: [PATCH] - Fix bug where fallback_tcp causes wrong roundtrip and edns observation to be noted in cache. Fix bug where EDNSprobe halted exponential backoff if EDNS status unknown. - new unresponsive host method, exponentially increasing block backoff. - iana portlist updated. git-svn-id: file:///svn/unbound/trunk@2303 be551aaa-1e26-0410-a405-d3ace91eadb9 --- daemon/cachedump.c | 35 ++++++++++++++++++++++----- doc/Changelog | 7 ++++++ iterator/iter_utils.c | 6 +---- iterator/iterator.h | 2 ++ services/cache/infra.c | 49 +++++++++++++++++++++++++++++++------- services/cache/infra.h | 18 ++++++++++++++ services/outside_network.c | 18 +++++++------- util/iana_ports.inc | 2 ++ util/rtt.c | 5 ++++ util/rtt.h | 7 ++++++ 10 files changed, 121 insertions(+), 28 deletions(-) diff --git a/daemon/cachedump.c b/daemon/cachedump.c index ee88b11c2..5a420db60 100644 --- a/daemon/cachedump.c +++ b/daemon/cachedump.c @@ -803,6 +803,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) char buf[257]; struct delegpt_addr* a; int lame, dlame, rlame, rtt, edns_vs, to, lost; + int entry_ttl, clean_rtt, backoff; uint8_t edns_lame_known; for(a = dp->target_list; a; a = a->next_target) { addr_to_str(&a->addr, a->addrlen, buf, sizeof(buf)); @@ -813,6 +814,20 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) return; } /* lookup in infra cache */ + entry_ttl = infra_get_host_rto(worker->env.infra_cache, + &a->addr, a->addrlen, &clean_rtt, &rtt, &backoff, + *worker->env.now); + if(entry_ttl == -1) { + if(!ssl_printf(ssl, "not in infra cache.\n")) + return; + continue; /* skip stuff not in infra cache */ + } else if(entry_ttl == -2) { + if(!ssl_printf(ssl, "not in infra cache " + "(backoff %d).\n", backoff)) + return; + continue; /* skip stuff not in infra cache */ + } + /* uses type_A because most often looked up, but other * lameness won't be reported then */ if(!infra_get_lame_rtt(worker->env.infra_cache, @@ -823,20 +838,28 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp) return; continue; /* skip stuff not in infra cache */ } - if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost. ", + if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost, ttl %d", lame?"LAME ":"", dlame?"NoDNSSEC ":"", a->lame?"AddrWasParentSide ":"", - rlame?"NoAuthButRecursive ":"", rtt, lost)) + rlame?"NoAuthButRecursive ":"", rtt, lost, entry_ttl)) return; + if(rtt != clean_rtt && clean_rtt != 376 /* unknown */) { + if(!ssl_printf(ssl, ", ping %d", clean_rtt)) + return; + } + if(backoff != INFRA_BACKOFF_INITIAL) { + if(!ssl_printf(ssl, ", backoff %d", backoff)) + return; + } if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen, *worker->env.now, &edns_vs, &edns_lame_known, &to)) { if(edns_vs == -1) { - if(!ssl_printf(ssl, "noEDNS%s.", - edns_lame_known?" probed":"")) + if(!ssl_printf(ssl, ", noEDNS%s.", + edns_lame_known?" probed":" assumed")) return; } else { - if(!ssl_printf(ssl, "EDNS %d%s.", - edns_vs, edns_lame_known?" probed":"")) + if(!ssl_printf(ssl, ", EDNS %d%s.", edns_vs, + edns_lame_known?" probed":" assumed")) return; } } diff --git a/doc/Changelog b/doc/Changelog index 09c8bd0c4..98fad2e46 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,3 +1,10 @@ +21 October 2010: Wouter + - Fix bug where fallback_tcp causes wrong roundtrip and edns + observation to be noted in cache. Fix bug where EDNSprobe halted + exponential backoff if EDNS status unknown. + - new unresponsive host method, exponentially increasing block backoff. + - iana portlist updated. + 20 October 2010: Wouter - interface automatic works for some people with ip6 disabled. Therefore the error check is removed, so they can use the option. diff --git a/iterator/iter_utils.c b/iterator/iter_utils.c index ca1781a1b..7bb1c1244 100644 --- a/iterator/iter_utils.c +++ b/iterator/iter_utils.c @@ -206,11 +206,9 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env, a->lame?" ADDR_LAME":""); if(lame) return -1; /* server is lame */ - else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT && - lost >= USEFUL_SERVER_MAX_LOST) { + else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT) /* server is unresponsive */ return USEFUL_SERVER_TOP_TIMEOUT; - } /* select remainder from worst to best */ else if(reclame) return rtt+USEFUL_SERVER_TOP_TIMEOUT*3; /* nonpref */ @@ -218,8 +216,6 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env, return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */ else if(a->lame) return rtt+USEFUL_SERVER_TOP_TIMEOUT+1; /* nonpref */ - else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT) /* not blacklisted*/ - return USEFUL_SERVER_TOP_TIMEOUT+1; else return rtt; } /* no server information present */ diff --git a/iterator/iterator.h b/iterator/iterator.h index a2d3eab47..24fa6a3b5 100644 --- a/iterator/iterator.h +++ b/iterator/iterator.h @@ -75,6 +75,8 @@ struct iter_priv; * Chosen so that the UNKNOWN_SERVER_NICENESS falls within the band of a * fast server, this causes server exploration as a side benefit. msec. */ #define RTT_BAND 400 +/** Start value for blacklisting a host, 2*USEFUL_SERVER_TOP_TIMEOUT in sec */ +#define INFRA_BACKOFF_INITIAL 240 /** * Global state for the iterator. diff --git a/services/cache/infra.c b/services/cache/infra.c index 2da7c3996..a2cd5996f 100644 --- a/services/cache/infra.c +++ b/services/cache/infra.c @@ -190,7 +190,7 @@ infra_lookup_host(struct infra_cache* infra, return data; } -/** init the host elements (not lame elems) */ +/** init the host elements (not lame elems, not backoff) */ static void host_entry_init(struct infra_cache* infra, struct lruhash_entry* e, uint32_t timenow) @@ -233,6 +233,7 @@ new_host_entry(struct infra_cache* infra, struct sockaddr_storage* addr, key->addrlen = addrlen; memcpy(&key->addr, addr, addrlen); data->lameness = NULL; + data->backoff = INFRA_BACKOFF_INITIAL; host_entry_init(infra, &key->entry, tm); return &key->entry; } @@ -270,14 +271,6 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, /* use existing entry */ data = (struct infra_host_data*)e->data; *to = rtt_timeout(&data->rtt); - if(*to >= USEFUL_SERVER_TOP_TIMEOUT && - data->num_timeouts < USEFUL_SERVER_MAX_LOST) - /* use smaller timeout, backoff does not work - * The server seems to still reply but sporadically. - * Perhaps it has rate-limited the traffic, or it - * drops particular queries (AAAA). ignore timeouts, - * and use the jostle timeout for rtt estimate. */ - *to = (int)infra->jostle; *edns_vs = data->edns_version; *edns_lame_known = data->edns_lame_known; lock_rw_unlock(&e->lock); @@ -491,11 +484,29 @@ infra_rtt_update(struct infra_cache* infra, /* have an entry, update the rtt */ data = (struct infra_host_data*)e->data; if(roundtrip == -1) { + int o = rtt_timeout(&data->rtt); rtt_lost(&data->rtt, orig_rtt); + if(rtt_timeout(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT + && o < USEFUL_SERVER_TOP_TIMEOUT) { + /* backoff the blacklisted timeout */ + log_addr(VERB_ALGO, "backoff for", addr, addrlen); + data->backoff *= 2; + if(data->backoff >= 24*3600) + data->backoff = 24*3600; + verbose(VERB_ALGO, "backoff to %d", data->backoff); + /* increase the infra item TTL */ + data->ttl = timenow + data->backoff; + } + if(data->num_timeouts<255) data->num_timeouts++; } else { rtt_update(&data->rtt, roundtrip); + /* un-backoff the element */ + if(data->backoff > (uint32_t)infra->host_ttl*2) + data->backoff = (uint32_t)infra->host_ttl*2; + else data->backoff = INFRA_BACKOFF_INITIAL; + data->num_timeouts = 0; } if(data->rtt.rto > 0) @@ -507,6 +518,26 @@ infra_rtt_update(struct infra_cache* infra, return rto; } +int infra_get_host_rto(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + int* rtt, int* rto, int* backoff, uint32_t timenow) +{ + struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr, + addrlen, 0); + struct infra_host_data* data; + int ttl = -2; + if(!e) return -1; + data = (struct infra_host_data*)e->data; + *backoff = (int)data->backoff; + if(data->ttl >= timenow) { + ttl = (int)(data->ttl - timenow); + *rtt = rtt_notimeout(&data->rtt); + *rto = rtt_unclamped(&data->rtt); + } + lock_rw_unlock(&e->lock); + return ttl; +} + int infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr, socklen_t addrlen, diff --git a/services/cache/infra.h b/services/cache/infra.h index e70ed968f..956edd23f 100644 --- a/services/cache/infra.h +++ b/services/cache/infra.h @@ -64,6 +64,8 @@ struct infra_host_key { struct infra_host_data { /** TTL value for this entry. absolute time. */ uint32_t ttl; + /** backoff time if blacklisted unresponsive. in seconds. */ + uint32_t backoff; /** round trip times for timeout calculation */ struct rtt_info rtt; /** Names of the zones that are lame. NULL=no lame zones. */ @@ -286,6 +288,22 @@ int infra_get_lame_rtt(struct infra_cache* infra, int* lame, int* dnsseclame, int* reclame, int* rtt, int* lost, uint32_t timenow); +/** + * Get additional (debug) info on timing. + * @param infra: infra cache. + * @param addr: host address. + * @param addrlen: length of addr. + * @param rtt: the clean rtt time (of working replies). + * @param rto: the rtt with timeouts applied. (rtt as returned by other funcs). + * @param backoff: the backoff time for blacked entries. + * @param timenow: what time it is now. + * @return TTL the infra host element is valid for. If -1: not found in cache. + * If -2: found in cache, but TTL was not valid, only backoff is filled. + */ +int infra_get_host_rto(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen, + int* rtt, int* rto, int* backoff, uint32_t timenow); + /** * Get memory used by the infra cache. * @param infra: infrastructure cache. diff --git a/services/outside_network.c b/services/outside_network.c index 151abf1d2..bddbb5608 100644 --- a/services/outside_network.c +++ b/services/outside_network.c @@ -1305,6 +1305,7 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff) if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, now, &vs, &edns_lame_known, &rtt)) return 0; + sq->last_rtt = rtt; if(sq->status == serviced_initial) { if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) { /* perform EDNS lame probe - check if server is @@ -1321,7 +1322,6 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff) } serviced_encode(sq, buff, sq->status == serviced_query_UDP_EDNS); sq->last_sent_time = *sq->outnet->now_tv; - sq->last_rtt = rtt; sq->edns_lame_known = (int)edns_lame_known; verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt); sq->pending = pending_udp_query(sq->outnet, buff, &sq->addr, @@ -1584,7 +1584,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, serviced_callbacks(sq, error, c, rep); return 0; } - if(sq->status == serviced_query_UDP_EDNS + if(!fallback_tcp) { + if(sq->status == serviced_query_UDP_EDNS && (LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE( ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) { @@ -1595,7 +1596,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, serviced_callbacks(sq, NETEVENT_CLOSED, c, rep); } return 0; - } else if(sq->status == serviced_query_PROBE_EDNS) { + } else if(sq->status == serviced_query_PROBE_EDNS) { /* probe without EDNS succeeds, so we conclude that this * host likely has EDNS packets dropped */ log_addr(VERB_DETAIL, "timeouts, concluded that connection to " @@ -1607,7 +1608,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, log_err("Out of memory caching no edns for host"); } sq->status = serviced_query_UDP; - } else if(sq->status == serviced_query_UDP_EDNS && + } else if(sq->status == serviced_query_UDP_EDNS && !sq->edns_lame_known) { /* now we know that edns queries received answers store that */ if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, @@ -1615,7 +1616,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, log_err("Out of memory caching edns works"); } sq->edns_lame_known = 1; - } else if(sq->status == serviced_query_UDP_EDNS_fallback && + } else if(sq->status == serviced_query_UDP_EDNS_fallback && !sq->edns_lame_known && (LDNS_RCODE_WIRE( ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) == @@ -1630,8 +1631,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, log_err("Out of memory caching no edns for host"); } sq->status = serviced_query_UDP; - } - if(now.tv_sec > sq->last_sent_time.tv_sec || + } + if(now.tv_sec > sq->last_sent_time.tv_sec || (now.tv_sec == sq->last_sent_time.tv_sec && now.tv_usec > sq->last_sent_time.tv_usec)) { /* convert from microseconds to milliseconds */ @@ -1642,7 +1643,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, roundtime, sq->last_rtt, (uint32_t)now.tv_sec)) log_err("out of memory noting rtt."); - } + } + } /* end of if_!fallback_tcp */ /* perform TC flag check and TCP fallback after updating our * cache entries for EDNS status and RTT times */ if(LDNS_TC_WIRE(ldns_buffer_begin(c->buffer)) || fallback_tcp) { diff --git a/util/iana_ports.inc b/util/iana_ports.inc index 07956a67d..20b0242d9 100644 --- a/util/iana_ports.inc +++ b/util/iana_ports.inc @@ -4079,6 +4079,8 @@ 5100, 5101, 5102, +5104, +5105, 5111, 5112, 5113, diff --git a/util/rtt.c b/util/rtt.c index efd7925a5..af21cc39f 100644 --- a/util/rtt.c +++ b/util/rtt.c @@ -109,3 +109,8 @@ rtt_lost(struct rtt_info* rtt, int orig) rtt->rto = RTT_MAX_TIMEOUT; } } + +int rtt_notimeout(const struct rtt_info* rtt) +{ + return calc_rto(rtt); +} diff --git a/util/rtt.h b/util/rtt.h index 9dc6c04e4..1af5484c9 100644 --- a/util/rtt.h +++ b/util/rtt.h @@ -81,6 +81,13 @@ int rtt_timeout(const struct rtt_info* rtt); */ int rtt_unclamped(const struct rtt_info* rtt); +/** + * RTT for valid responses. Without timeouts. + * @param rtt: round trip statistics structure. + * @return: value in msec. + */ +int rtt_notimeout(const struct rtt_info* rtt); + /** * Update the statistics with a new roundtrip estimate observation. * @param rtt: round trip statistics structure. -- 2.47.2