From: Wouter Wijngaards Date: Wed, 17 Oct 2007 19:23:01 +0000 (+0000) Subject: failover to next server. X-Git-Tag: release-0.6~53 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=907ab3d99a3b47683932f7269063a9145c346951;p=thirdparty%2Funbound.git failover to next server. git-svn-id: file:///svn/unbound/trunk@691 be551aaa-1e26-0410-a405-d3ace91eadb9 --- diff --git a/doc/Changelog b/doc/Changelog index 7fbfded6b..2114bc3be 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -17,6 +17,7 @@ - fixup removal of nonsecure items from the additional. - reduced timeout values to more realistic, 376 msec (262 msec has 90% of roundtrip times, 512 msec has 99% of roundtrip times.) + - server selection failover to next server after timeout (376 msec). 16 October 2007: Wouter - no malloc in log_hex. diff --git a/iterator/iter_delegpt.c b/iterator/iter_delegpt.c index d63eca301..4a8870250 100644 --- a/iterator/iter_delegpt.c +++ b/iterator/iter_delegpt.c @@ -152,6 +152,7 @@ delegpt_add_addr(struct delegpt* dp, struct region* region, dp->usable_list = a; memcpy(&a->addr, addr, addrlen); a->addrlen = addrlen; + a->attempts = 0; return 1; } diff --git a/iterator/iter_delegpt.h b/iterator/iter_delegpt.h index 30cd4da68..6c4170028 100644 --- a/iterator/iter_delegpt.h +++ b/iterator/iter_delegpt.h @@ -104,6 +104,8 @@ struct delegpt_addr { struct sockaddr_storage addr; /** length of addr */ socklen_t addrlen; + /** number of attempts for this addr */ + int attempts; }; /** diff --git a/iterator/iter_utils.c b/iterator/iter_utils.c index 8c0a7bb0e..b07711a6b 100644 --- a/iterator/iter_utils.c +++ b/iterator/iter_utils.c @@ -214,6 +214,8 @@ iter_server_selection(struct iter_env* iter_env, return NULL; if(num == 1) { a = dp->result_list; + if(++a->attempts < OUTBOUND_MSG_RETRY) + return a; dp->result_list = a->next_result; return a; } @@ -231,6 +233,8 @@ iter_server_selection(struct iter_env* iter_env, } if(!a) /* robustness */ return NULL; + if(++a->attempts < OUTBOUND_MSG_RETRY) + return a; /* remove it from the delegation point result list */ if(prev) prev->next_result = a->next_result; diff --git a/iterator/iterator.h b/iterator/iterator.h index 817c00654..16ddbb45a 100644 --- a/iterator/iterator.h +++ b/iterator/iterator.h @@ -64,6 +64,8 @@ struct iter_prep_list; * Equals RTT_MAX_TIMEOUT */ #define USEFUL_SERVER_TOP_TIMEOUT 120000 +/** number of retries on outgoing queries */ +#define OUTBOUND_MSG_RETRY 4 /** * Global state for the iterator. diff --git a/services/cache/infra.c b/services/cache/infra.c index 034e3b298..5b75e3d3b 100644 --- a/services/cache/infra.c +++ b/services/cache/infra.c @@ -401,6 +401,23 @@ infra_set_lame(struct infra_cache* infra, return 1; } +void +infra_update_tcp_works(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen) +{ + struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr, + addrlen, 1); + struct infra_host_data* data; + if(!e) + return; /* doesn't exist */ + data = (struct infra_host_data*)e->data; + if(data->rtt.rto >= RTT_MAX_TIMEOUT) + /* do not disqualify this server altogether, it is better + * than nothing */ + data->rtt.rto = RTT_MAX_TIMEOUT-1; + lock_rw_unlock(&e->lock); +} + int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, socklen_t addrlen, @@ -410,6 +427,7 @@ infra_rtt_update(struct infra_cache* infra, addrlen, 1); struct infra_host_data* data; int needtoinsert = 0; + int rto = 1; if(!e) { if(!(e = new_host_entry(infra, addr, addrlen, timenow))) return 0; @@ -421,11 +439,13 @@ infra_rtt_update(struct infra_cache* infra, if(roundtrip == -1) rtt_lost(&data->rtt); else rtt_update(&data->rtt, roundtrip); + if(data->rtt.rto > 0) + rto = data->rtt.rto; if(needtoinsert) slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); else { lock_rw_unlock(&e->lock); } - return 1; + return rto; } int diff --git a/services/cache/infra.h b/services/cache/infra.h index ed2aa50e6..1252c717e 100644 --- a/services/cache/infra.h +++ b/services/cache/infra.h @@ -199,12 +199,21 @@ int infra_set_lame(struct infra_cache* infra, * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for * timeout. * @param timenow: what time it is now. - * @return: 0 on error. + * @return: 0 on error. new rto otherwise. */ int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, socklen_t addrlen, int roundtrip, time_t timenow); +/** + * Update information for the host, store that a TCP transaction works. + * @param infra: infrastructure cache. + * @param addr: host address. + * @param addrlen: length of addr. + */ +void infra_update_tcp_works(struct infra_cache* infra, + struct sockaddr_storage* addr, socklen_t addrlen); + /** * Update edns information for the host. * @param infra: infrastructure cache. diff --git a/services/outside_network.c b/services/outside_network.c index cf1e347c3..457e20f8e 100644 --- a/services/outside_network.c +++ b/services/outside_network.c @@ -61,7 +61,7 @@ /** number of times to retry making a random ID that is unique. */ #define MAX_ID_RETRY 1000 /** number of retries on outgoing UDP queries */ -#define OUTBOUND_UDP_RETRY 4 +#define OUTBOUND_UDP_RETRY 1 /** initiate TCP transaction for serviced query */ static void serviced_tcp_initiate(struct outside_network* outnet, @@ -1021,6 +1021,9 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, sq->pending = NULL; /* removed after this callback */ if(error != NETEVENT_NOERROR && verbosity >= VERB_DETAIL) log_addr("tcp error for address", &sq->addr, sq->addrlen); + if(error==NETEVENT_NOERROR) + infra_update_tcp_works(sq->outnet->infra, &sq->addr, + sq->addrlen); if(error==NETEVENT_NOERROR && LDNS_RCODE_WIRE(ldns_buffer_begin( c->buffer)) == LDNS_RCODE_FORMERR && sq->status == serviced_query_TCP_EDNS) { @@ -1066,6 +1069,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, struct serviced_query* sq = (struct serviced_query*)arg; struct outside_network* outnet = sq->outnet; struct timeval now; + int fallback_tcp = 0; if(gettimeofday(&now, NULL) < 0) { log_err("gettimeofday: %s", strerror(errno)); /* this option does not need current time */ @@ -1073,9 +1077,10 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, } sq->pending = NULL; /* removed after callback */ if(error == NETEVENT_TIMEOUT) { + int rto = 0; sq->retry++; - if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, - -1, (time_t)now.tv_sec)) + if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, + -1, (time_t)now.tv_sec))) log_err("out of memory in UDP exponential backoff"); if(sq->retry < OUTBOUND_UDP_RETRY) { log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10, @@ -1085,8 +1090,13 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, } return 0; } - error = NETEVENT_TIMEOUT; - /* UDP does not work, fallback to TCP below */ + if(rto >= RTT_MAX_TIMEOUT) { + fallback_tcp = 1; + /* UDP does not work, fallback to TCP below */ + } else { + serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep); + return 0; + } } if(error == NETEVENT_NOERROR && sq->status == serviced_query_UDP_EDNS && LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) @@ -1103,8 +1113,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, } return 0; } - if(error != NETEVENT_NOERROR || - LDNS_TC_WIRE(ldns_buffer_begin(c->buffer))) { + if(LDNS_TC_WIRE(ldns_buffer_begin(c->buffer)) || + (error != NETEVENT_NOERROR && fallback_tcp) ) { /* fallback to TCP */ /* this discards partial UDP contents */ if(sq->status == serviced_query_UDP_EDNS)