From: Wouter Wijngaards Date: Fri, 10 Feb 2012 12:17:25 +0000 (+0000) Subject: - Fix timeouts to keep track of query type, A, AAAA and other, if X-Git-Tag: release-1.4.17rc1~60 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=09b9ea04a3829b0e0e04071a7bb1d7f77c60686a;p=thirdparty%2Funbound.git - Fix timeouts to keep track of query type, A, AAAA and other, if another has caused timeout blacklist, different type can still probe. git-svn-id: file:///svn/unbound/trunk@2613 be551aaa-1e26-0410-a405-d3ace91eadb9 --- diff --git a/doc/Changelog b/doc/Changelog index 812612dc4..a92e9495a 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,5 +1,7 @@ 10 February 2012: Wouter - Slightly smaller critical region in one case in infra cache. + - Fix timeouts to keep track of query type, A, AAAA and other, if + another has caused timeout blacklist, different type can still probe. 9 February 2012: Wouter - Fix AHX_BROKEN_MEMCMP for autoheader mess up of #undef in config.h. diff --git a/services/cache/infra.c b/services/cache/infra.c index e6772baed..9baedd13f 100644 --- a/services/cache/infra.c +++ b/services/cache/infra.c @@ -52,6 +52,11 @@ /** Timeout when only a single probe query per IP is allowed. */ #define PROBE_MAXRTO 12000 /* in msec */ +/** number of timeouts for a type when the domain can be blocked ; + * even if another type has completely rtt maxed it, the different type + * can do this number of packets (until those all timeout too) */ +#define TIMEOUT_COUNT_MAX 3 + size_t infra_sizefunc(void* k, void* ATTR_UNUSED(d)) { @@ -196,6 +201,9 @@ data_entry_init(struct infra_cache* infra, struct lruhash_entry* e, data->rec_lame = 0; data->lame_type_A = 0; data->lame_other = 0; + data->timeout_A = 0; + data->timeout_AAAA = 0; + data->timeout_other = 0; } /** @@ -250,6 +258,9 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, if(e && ((struct infra_data*)e->data)->ttl < timenow) { /* it expired, try to reuse existing entry */ int old = ((struct infra_data*)e->data)->rtt.rto; + uint8_t tA = ((struct infra_data*)e->data)->timeout_A; + uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA; + uint8_t tother = ((struct infra_data*)e->data)->timeout_other; lock_rw_unlock(&e->lock); e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); if(e) { @@ -259,9 +270,13 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, data_entry_init(infra, e, timenow); wr = 1; /* TOP_TIMEOUT remains on reuse */ - if(old >= USEFUL_SERVER_TOP_TIMEOUT) + if(old >= USEFUL_SERVER_TOP_TIMEOUT) { ((struct infra_data*)e->data)->rtt.rto = USEFUL_SERVER_TOP_TIMEOUT; + ((struct infra_data*)e->data)->timeout_A = tA; + ((struct infra_data*)e->data)->timeout_AAAA = tAAAA; + ((struct infra_data*)e->data)->timeout_other = tother; + } } } if(!e) { @@ -358,8 +373,8 @@ infra_update_tcp_works(struct infra_cache* infra, int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, - socklen_t addrlen, uint8_t* nm, size_t nmlen, int roundtrip, - int orig_rtt, uint32_t timenow) + socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype, + int roundtrip, int orig_rtt, uint32_t timenow) { struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); @@ -377,9 +392,24 @@ infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, data = (struct infra_data*)e->data; if(roundtrip == -1) { rtt_lost(&data->rtt, orig_rtt); + if(qtype == LDNS_RR_TYPE_A) { + if(data->timeout_A < TIMEOUT_COUNT_MAX) + data->timeout_A++; + } else if(qtype == LDNS_RR_TYPE_AAAA) { + if(data->timeout_AAAA < TIMEOUT_COUNT_MAX) + data->timeout_AAAA++; + } else { + if(data->timeout_other < TIMEOUT_COUNT_MAX) + data->timeout_other++; + } } else { rtt_update(&data->rtt, roundtrip); data->probedelay = 0; + if(qtype == LDNS_RR_TYPE_A) + data->timeout_A = 0; + else if(qtype == LDNS_RR_TYPE_AAAA) + data->timeout_AAAA = 0; + else data->timeout_other = 0; } if(data->rtt.rto > 0) rto = data->rtt.rto; @@ -456,9 +486,23 @@ infra_get_lame_rtt(struct infra_cache* infra, host = (struct infra_data*)e->data; *rtt = rtt_unclamped(&host->rtt); if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay - && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) + && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) { /* single probe for this domain, and we are not probing */ - *rtt = USEFUL_SERVER_TOP_TIMEOUT; + /* unless the query type allows a probe to happen */ + if(qtype == LDNS_RR_TYPE_A) { + if(host->timeout_A >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } else if(qtype == LDNS_RR_TYPE_AAAA) { + if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } else { + if(host->timeout_other >= TIMEOUT_COUNT_MAX) + *rtt = USEFUL_SERVER_TOP_TIMEOUT; + else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; + } + } if(timenow > host->ttl) { /* expired entry */ /* see if this can be a re-probe of an unresponsive server */ diff --git a/services/cache/infra.h b/services/cache/infra.h index 3a3508eac..5d15b5eee 100644 --- a/services/cache/infra.h +++ b/services/cache/infra.h @@ -91,6 +91,13 @@ struct infra_data { uint8_t lame_type_A; /** the host is lame (not authoritative) for other query types */ uint8_t lame_other; + + /** timeouts counter for type A */ + uint8_t timeout_A; + /** timeouts counter for type AAAA */ + uint8_t timeout_AAAA; + /** timeouts counter for others */ + uint8_t timeout_other; }; /** @@ -195,6 +202,7 @@ int infra_set_lame(struct infra_cache* infra, * @param addrlen: length of addr. * @param name: zone name * @param namelen: zone name length + * @param qtype: query type. * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for * timeout. * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1). @@ -203,7 +211,7 @@ int infra_set_lame(struct infra_cache* infra, * @return: 0 on error. new rto otherwise. */ int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, - socklen_t addrlen, uint8_t* name, size_t namelen, + socklen_t addrlen, uint8_t* name, size_t namelen, int qtype, int roundtrip, int orig_rtt, uint32_t timenow); /** diff --git a/services/outside_network.c b/services/outside_network.c index 7fd408e79..3282f60e3 100644 --- a/services/outside_network.c +++ b/services/outside_network.c @@ -1166,7 +1166,7 @@ static struct serviced_query* serviced_create(struct outside_network* outnet, ldns_buffer* buff, int dnssec, int want_dnssec, int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, - size_t zonelen) + size_t zonelen, int qtype) { struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq)); #ifdef UNBOUND_DEBUG @@ -1188,6 +1188,7 @@ serviced_create(struct outside_network* outnet, ldns_buffer* buff, int dnssec, return NULL; } sq->zonelen = zonelen; + sq->qtype = qtype; sq->dnssec = dnssec; sq->want_dnssec = want_dnssec; sq->tcp_upstream = tcp_upstream; @@ -1566,8 +1567,8 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, * huge due to system-hibernated and we woke up */ if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) { if(!infra_rtt_update(sq->outnet->infra, &sq->addr, - sq->addrlen, sq->zone, sq->zonelen, roundtime, - sq->last_rtt, (uint32_t)now.tv_sec)) + sq->addrlen, sq->zone, sq->zonelen, sq->qtype, + roundtime, sq->last_rtt, (uint32_t)now.tv_sec)) log_err("out of memory noting rtt."); } } @@ -1658,7 +1659,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, } sq->retry++; if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, - sq->zone, sq->zonelen, -1, sq->last_rtt, + sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt, (uint32_t)now.tv_sec))) log_err("out of memory in UDP exponential backoff"); if(sq->retry < OUTBOUND_UDP_RETRY) { @@ -1752,8 +1753,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, * above this value gives trouble with server selection */ if(roundtime < 60000) { if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, - sq->zone, sq->zonelen, roundtime, sq->last_rtt, - (uint32_t)now.tv_sec)) + sq->zone, sq->zonelen, sq->qtype, roundtime, + sq->last_rtt, (uint32_t)now.tv_sec)) log_err("out of memory noting rtt."); } } @@ -1814,7 +1815,7 @@ outnet_serviced_query(struct outside_network* outnet, /* make new serviced query entry */ sq = serviced_create(outnet, buff, dnssec, want_dnssec, tcp_upstream, ssl_upstream, addr, addrlen, zone, - zonelen); + zonelen, qtype); if(!sq) { free(cb); return NULL; diff --git a/services/outside_network.h b/services/outside_network.h index bfaab459e..ab18d2406 100644 --- a/services/outside_network.h +++ b/services/outside_network.h @@ -312,6 +312,8 @@ struct serviced_query { uint8_t* zone; /** length of zone name */ size_t zonelen; + /** qtype */ + int qtype; /** current status */ enum serviced_query_status { /** initial status */ diff --git a/testcode/fake_event.c b/testcode/fake_event.c index c6d0ad3fe..d31fa79b0 100644 --- a/testcode/fake_event.c +++ b/testcode/fake_event.c @@ -147,6 +147,7 @@ delete_fake_pending(struct fake_pending* pend) { if(!pend) return; + free(pend->zone); ldns_buffer_free(pend->buffer); ldns_pkt_free(pend->pkt); free(pend); @@ -554,7 +555,7 @@ do_infra_rtt(struct replay_runtime* runtime) if(!dp) fatal_exit("cannot parse %s", now->variable); rto = infra_rtt_update(runtime->infra, &now->addr, now->addrlen, ldns_rdf_data(dp), ldns_rdf_size(dp), - atoi(now->string), -1, runtime->now_secs); + LDNS_RR_TYPE_A, atoi(now->string), -1, runtime->now_secs); log_addr(0, "INFRA_RTT for", &now->addr, now->addrlen); log_info("INFRA_RTT(%s roundtrip %d): rto of %d", now->variable, atoi(now->string), rto); @@ -562,6 +563,24 @@ do_infra_rtt(struct replay_runtime* runtime) ldns_rdf_deep_free(dp); } +/** perform exponential backoff on the timout */ +static void +expon_timeout_backoff(struct replay_runtime* runtime) +{ + struct fake_pending* p = runtime->pending_list; + int rtt, vs; + uint8_t edns_lame_known; + int last_rtt, rto; + if(!p) return; /* no pending packet to backoff */ + if(!infra_host(runtime->infra, &p->addr, p->addrlen, p->zone, + p->zonelen, runtime->now_secs, &vs, &edns_lame_known, &rtt)) + return; + last_rtt = rtt; + rto = infra_rtt_update(runtime->infra, &p->addr, p->addrlen, p->zone, + p->zonelen, p->qtype, -1, last_rtt, runtime->now_secs); + log_info("infra_rtt_update returned rto %d", rto); +} + /** * Advance to the next moment. */ @@ -608,6 +627,7 @@ do_moment_and_advance(struct replay_runtime* runtime) case repevt_timeout: mom = runtime->now; advance_moment(runtime); + expon_timeout_backoff(runtime); fake_pending_callback(runtime, mom, NETEVENT_TIMEOUT); break; case repevt_back_reply: @@ -929,6 +949,7 @@ pending_udp_query(struct outside_network* outnet, ldns_buffer* packet, pend->timeout = timeout/1000; pend->transport = transport_udp; pend->pkt = NULL; + pend->zone = NULL; pend->serviced = 0; pend->runtime = runtime; status = ldns_buffer2pkt_wire(&pend->pkt, packet); @@ -982,6 +1003,7 @@ pending_tcp_query(struct outside_network* outnet, ldns_buffer* packet, pend->timeout = timeout; pend->transport = transport_tcp; pend->pkt = NULL; + pend->zone = NULL; pend->runtime = runtime; pend->serviced = 0; status = ldns_buffer2pkt_wire(&pend->pkt, packet); @@ -1017,9 +1039,8 @@ struct serviced_query* outnet_serviced_query(struct outside_network* outnet, uint16_t flags, int dnssec, int ATTR_UNUSED(want_dnssec), int ATTR_UNUSED(tcp_upstream), int ATTR_UNUSED(ssl_upstream), struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, - size_t ATTR_UNUSED(zonelen), comm_point_callback_t* callback, - void* callback_arg, ldns_buffer* ATTR_UNUSED(buff), - int (*arg_compare)(void*,void*)) + size_t zonelen, comm_point_callback_t* callback, void* callback_arg, + ldns_buffer* ATTR_UNUSED(buff), int (*arg_compare)(void*,void*)) { struct replay_runtime* runtime = (struct replay_runtime*)outnet->base; struct fake_pending* pend = (struct fake_pending*)calloc(1, @@ -1062,6 +1083,10 @@ struct serviced_query* outnet_serviced_query(struct outside_network* outnet, } memcpy(&pend->addr, addr, addrlen); pend->addrlen = addrlen; + pend->zone = memdup(zone, zonelen); + pend->zonelen = zonelen; + pend->qtype = qtype; + log_assert(pend->zone); pend->callback = callback; pend->cb_arg = callback_arg; pend->timeout = UDP_AUTH_QUERY_TIMEOUT; diff --git a/testcode/replay.h b/testcode/replay.h index 96814ed2d..049db4e80 100644 --- a/testcode/replay.h +++ b/testcode/replay.h @@ -323,6 +323,12 @@ struct fake_pending { struct sockaddr_storage addr; /** len of addr */ socklen_t addrlen; + /** zone name, uncompressed wire format (as used when sent) */ + uint8_t* zone; + /** length of zone name */ + size_t zonelen; + /** qtype */ + int qtype; /** The callback function to call when answer arrives (or timeout) */ comm_point_callback_t* callback; /** callback user argument */ diff --git a/testcode/unitmain.c b/testcode/unitmain.c index df42466c8..f381b0b03 100644 --- a/testcode/unitmain.c +++ b/testcode/unitmain.c @@ -445,7 +445,7 @@ infra_test(void) &vs, &edns_lame, &to) ); unit_assert( vs == 0 && to == init && edns_lame == 0 ); - unit_assert( infra_rtt_update(slab, &one, onelen, zone, zonelen, -1, init, now) ); + unit_assert( infra_rtt_update(slab, &one, onelen, zone, zonelen, LDNS_RR_TYPE_A, -1, init, now) ); unit_assert( infra_host(slab, &one, onelen, zone, zonelen, now, &vs, &edns_lame, &to) ); unit_assert( vs == 0 && to == init*2 && edns_lame == 0 ); diff --git a/testdata/iter_timeout_ra_aaaa.rpl b/testdata/iter_timeout_ra_aaaa.rpl new file mode 100644 index 000000000..2815d4c5a --- /dev/null +++ b/testdata/iter_timeout_ra_aaaa.rpl @@ -0,0 +1,244 @@ +; config options +server: + target-fetch-policy: "0 0 0 0 0" + +stub-zone: + name: "." + stub-addr: 193.0.14.129 # K.ROOT-SERVERS.NET. +CONFIG_END + +SCENARIO_BEGIN Test iterator with timeouts on reclame AAAA dropping server + +; K.ROOT-SERVERS.NET. +RANGE_BEGIN 0 100 + ADDRESS 193.0.14.129 +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR NOERROR +SECTION QUESTION +. IN NS +SECTION ANSWER +. IN NS K.ROOT-SERVERS.NET. +SECTION ADDITIONAL +K.ROOT-SERVERS.NET. IN A 193.0.14.129 +ENTRY_END + +ENTRY_BEGIN +MATCH opcode subdomain +ADJUST copy_id copy_query +REPLY QR NOERROR +SECTION QUESTION +com. IN A +SECTION AUTHORITY +com. IN NS a.gtld-servers.net. +SECTION ADDITIONAL +a.gtld-servers.net. IN A 192.5.6.30 +ENTRY_END + +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR AA NOERROR +SECTION QUESTION +ns.example.net. IN A +SECTION ANSWER +ns.example.net. IN A 1.2.3.4 +ENTRY_END + +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR AA NOERROR +SECTION QUESTION +ns.example.net. IN AAAA +SECTION ANSWER +SECTION AUTHORITY +. IN SOA a. b. 1 2 3 4 5 +ENTRY_END + +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR NOERROR +SECTION QUESTION +a.gtld-servers.net. IN AAAA +SECTION ANSWER +SECTION AUTHORITY +. IN SOA a. b. 1 2 3 4 5 +ENTRY_END + +RANGE_END + +; a.gtld-servers.net. +RANGE_BEGIN 0 100 + ADDRESS 192.5.6.30 +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR NOERROR +SECTION QUESTION +com. IN NS +SECTION ANSWER +com. IN NS a.gtld-servers.net. +SECTION ADDITIONAL +a.gtld-servers.net. IN A 192.5.6.30 +ENTRY_END + +ENTRY_BEGIN +MATCH opcode subdomain +ADJUST copy_id copy_query +REPLY QR NOERROR +SECTION QUESTION +example.com. IN A +SECTION AUTHORITY +example.com. 280 IN NS ns.example.net. +SECTION ADDITIONAL +ns.example.net. IN A 1.2.3.4 +ENTRY_END +RANGE_END + +; ns.example.com. +; This server is REC_LAME +RANGE_BEGIN 0 100 + ADDRESS 1.2.3.4 +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR RA NOERROR +SECTION QUESTION +example.com. IN NS +SECTION ANSWER +example.com. 280 IN NS ns.example.net. +ENTRY_END + +ENTRY_BEGIN +MATCH opcode qtype qname +ADJUST copy_id +REPLY QR RA NOERROR +SECTION QUESTION +www.example.com. IN A +SECTION ANSWER +www.example.com. 10 IN A 10.20.30.40 +SECTION AUTHORITY +example.com. 280 IN NS ns.example.net. +ENTRY_END +RANGE_END + +STEP 1 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN A +ENTRY_END + +; recursion happens here. +STEP 10 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA NOERROR +SECTION QUESTION +www.example.com. IN A +SECTION ANSWER +www.example.com. IN A 10.20.30.40 +SECTION AUTHORITY +example.com. IN NS ns.example.net. +ENTRY_END + +; query for (dropped) AAAA record. +STEP 20 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END + +; the AAAA query times out. +STEP 21 TIMEOUT +STEP 22 TIMEOUT +STEP 23 TIMEOUT +STEP 24 TIMEOUT +STEP 25 TIMEOUT + +; we get servfail, but the AAA arrives again (after the servfail times +; out of the cache) +STEP 30 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA SERVFAIL +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END +STEP 31 TIME_PASSES ELAPSE 6 +STEP 40 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END + +; timeouts for AAAA keep happening. +STEP 41 TIMEOUT +STEP 42 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA SERVFAIL +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END +STEP 43 TIME_PASSES ELAPSE 12 + +STEP 50 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END +; fallback queries +STEP 51 TRAFFIC +; and it fails, no parentside entries and so on. +STEP 52 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA SERVFAIL +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END +STEP 53 TIME_PASSES ELAPSE 12 + +STEP 60 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END +STEP 61 TIMEOUT +STEP 62 TRAFFIC +STEP 63 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA SERVFAIL +SECTION QUESTION +www.example.com. IN AAAA +ENTRY_END + +STEP 70 QUERY +ENTRY_BEGIN +REPLY RD +SECTION QUESTION +www.example.com. IN A +ENTRY_END +STEP 72 TRAFFIC +STEP 73 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA NOERROR +SECTION QUESTION +www.example.com. IN A +SECTION ANSWER +www.example.com. IN A 10.20.30.40 +SECTION AUTHORITY +example.com. IN NS ns.example.net. +ENTRY_END + +SCENARIO_END