From: Wouter Wijngaards Date: Thu, 11 Apr 2019 13:41:53 +0000 (+0000) Subject: - Fix that auth zone fails over to next master for timeout in tcp. X-Git-Tag: final-svn-state~20 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c6369e9ffa59eb5a9f714f57810ab6ed389866b7;p=thirdparty%2Funbound.git - Fix that auth zone fails over to next master for timeout in tcp. git-svn-id: file:///svn/unbound/trunk@5155 be551aaa-1e26-0410-a405-d3ace91eadb9 --- diff --git a/doc/Changelog b/doc/Changelog index edb574cb2..03d613990 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -2,6 +2,7 @@ - Fix that auth zone uses correct network type for sockets for SOA serial probes. This fixes that probes fail because earlier probe addresses are unreachable. + - Fix that auth zone fails over to next master for timeout in tcp. 8 April 2019: Wouter - Fix to use event_assign with libevent for thread-safety. diff --git a/services/authzone.c b/services/authzone.c index c3691bd6c..e479e13c6 100644 --- a/services/authzone.c +++ b/services/authzone.c @@ -2042,11 +2042,13 @@ auth_xfer_delete(struct auth_xfer* xfr) if(xfr->task_probe) { auth_free_masters(xfr->task_probe->masters); comm_point_delete(xfr->task_probe->cp); + comm_timer_delete(xfr->task_probe->timer); free(xfr->task_probe); } if(xfr->task_transfer) { auth_free_masters(xfr->task_transfer->masters); comm_point_delete(xfr->task_transfer->cp); + comm_timer_delete(xfr->task_transfer->timer); if(xfr->task_transfer->chunks_first) { auth_chunks_delete(xfr->task_transfer); } @@ -4973,6 +4975,9 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env, static void xfr_transfer_disown(struct auth_xfer* xfr) { + /* remove timer (from this worker's event base) */ + comm_timer_delete(xfr->task_transfer->timer); + xfr->task_transfer->timer = NULL; /* remove the commpoint */ comm_point_delete(xfr->task_transfer->cp); xfr->task_transfer->cp = NULL; @@ -5054,6 +5059,8 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) struct sockaddr_storage addr; socklen_t addrlen = 0; struct auth_master* master = xfr->task_transfer->master; + struct timeval t; + int timeout; if(!master) return 0; if(master->allow_notify) return 0; /* only for notify */ @@ -5079,17 +5086,31 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) comm_point_delete(xfr->task_transfer->cp); xfr->task_transfer->cp = NULL; } + if(!xfr->task_transfer->timer) { + xfr->task_transfer->timer = comm_timer_create(env->worker_base, + auth_xfer_transfer_timer_callback, xfr); + if(!xfr->task_transfer->timer) { + log_err("malloc failure"); + return 0; + } + } + timeout = AUTH_TRANSFER_TIMEOUT; +#ifndef S_SPLINT_S + t.tv_sec = timeout/1000; + t.tv_usec = (timeout%1000)*1000; +#endif if(master->http) { /* perform http fetch */ /* store http port number into sockaddr, * unless someone used unbound's host@port notation */ + xfr->task_transfer->on_ixfr = 0; if(strchr(master->host, '@') == NULL) sockaddr_store_port(&addr, addrlen, master->port); xfr->task_transfer->cp = outnet_comm_point_for_http( env->outnet, auth_xfer_transfer_http_callback, xfr, - &addr, addrlen, AUTH_TRANSFER_TIMEOUT, master->ssl, - master->host, master->file); + &addr, addrlen, -1, master->ssl, master->host, + master->file); if(!xfr->task_transfer->cp) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5098,6 +5119,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) "connection for %s to %s", zname, as); return 0; } + comm_timer_set(xfr->task_transfer->timer, &t); if(verbosity >= VERB_ALGO) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5117,7 +5139,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) /* connect on fd */ xfr->task_transfer->cp = outnet_comm_point_for_tcp(env->outnet, auth_xfer_transfer_tcp_callback, xfr, &addr, addrlen, - env->scratch_buffer, AUTH_TRANSFER_TIMEOUT); + env->scratch_buffer, -1); if(!xfr->task_transfer->cp) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5126,6 +5148,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) "xfr %s to %s", zname, as); return 0; } + comm_timer_set(xfr->task_transfer->timer, &t); if(verbosity >= VERB_ALGO) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5678,6 +5701,47 @@ process_list_end_transfer(struct auth_xfer* xfr, struct module_env* env) xfr_transfer_nexttarget_or_end(xfr, env); } +/** callback for the task_transfer timer */ +void +auth_xfer_transfer_timer_callback(void* arg) +{ + struct auth_xfer* xfr = (struct auth_xfer*)arg; + struct module_env* env; + int gonextonfail = 1; + log_assert(xfr->task_transfer); + lock_basic_lock(&xfr->lock); + env = xfr->task_transfer->env; + if(env->outnet->want_to_quit) { + lock_basic_unlock(&xfr->lock); + return; /* stop on quit */ + } + + verbose(VERB_ALGO, "xfr stopped, connection timeout to %s", + xfr->task_transfer->master->host); + + /* see if IXFR caused the failure, if so, try AXFR */ + if(xfr->task_transfer->on_ixfr) { + xfr->task_transfer->ixfr_possible_timeout_count++; + if(xfr->task_transfer->ixfr_possible_timeout_count >= + NUM_TIMEOUTS_FALLBACK_IXFR) { + verbose(VERB_ALGO, "xfr to %s, fallback " + "from IXFR to AXFR (because of timeouts)", + xfr->task_transfer->master->host); + xfr->task_transfer->ixfr_fail = 1; + gonextonfail = 0; + } + } + + /* delete transferred data from list */ + auth_chunks_delete(xfr->task_transfer); + comm_point_delete(xfr->task_transfer->cp); + xfr->task_transfer->cp = NULL; + if(gonextonfail) + xfr_transfer_nextmaster(xfr); + xfr_transfer_nexttarget_or_end(xfr, env); + return; +} + /** callback for task_transfer tcp connections */ int auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err, @@ -5694,6 +5758,8 @@ auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err, lock_basic_unlock(&xfr->lock); return 0; /* stop on quit */ } + /* stop the timer */ + comm_timer_disable(xfr->task_transfer->timer); if(err != NETEVENT_NOERROR) { /* connection failed, closed, or timeout */ @@ -5774,6 +5840,8 @@ auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err, return 0; /* stop on quit */ } verbose(VERB_ALGO, "auth zone transfer http callback"); + /* stop the timer */ + comm_timer_disable(xfr->task_transfer->timer); if(err != NETEVENT_NOERROR && err != NETEVENT_DONE) { /* connection failed, closed, or timeout */ @@ -5973,13 +6041,12 @@ auth_xfer_probe_timer_callback(void* arg) return; /* stop on quit */ } + if(verbosity >= VERB_ALGO) { + char zname[255+1]; + dname_str(xfr->name, zname); + verbose(VERB_ALGO, "auth zone %s soa probe timeout", zname); + } if(xfr->task_probe->timeout <= AUTH_PROBE_TIMEOUT_STOP) { - if(verbosity >= VERB_ALGO) { - char zname[255+1]; - dname_str(xfr->name, zname); - verbose(VERB_ALGO, "auth zone %s soa probe timeout", - zname); - } /* try again with bigger timeout */ if(xfr_probe_send_probe(xfr, env, xfr->task_probe->timeout*2)) { lock_basic_unlock(&xfr->lock); diff --git a/services/authzone.h b/services/authzone.h index 376fe5144..a695bd029 100644 --- a/services/authzone.h +++ b/services/authzone.h @@ -400,6 +400,9 @@ struct auth_transfer { /** the transfer (TCP) to the master. * on the workers event base. */ struct comm_point* cp; + /** timeout for the transfer. + * on the workers event base. */ + struct comm_timer* timer; }; /** list of addresses */ @@ -649,6 +652,8 @@ int auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err, struct comm_reply* repinfo); /** xfer probe timeout callback, part of task_probe */ void auth_xfer_probe_timer_callback(void* arg); +/** xfer transfer timeout callback, part of task_transfer */ +void auth_xfer_transfer_timer_callback(void* arg); /** mesh callback for task_probe on lookup of host names */ void auth_xfer_probe_lookup_callback(void* arg, int rcode, struct sldns_buffer* buf, enum sec_status sec, char* why_bogus, diff --git a/util/fptr_wlist.c b/util/fptr_wlist.c index 02f85e8dc..94d23fa3a 100644 --- a/util/fptr_wlist.c +++ b/util/fptr_wlist.c @@ -127,6 +127,7 @@ fptr_whitelist_comm_timer(void (*fptr)(void*)) #endif else if(fptr == &auth_xfer_timer) return 1; else if(fptr == &auth_xfer_probe_timer_callback) return 1; + else if(fptr == &auth_xfer_transfer_timer_callback) return 1; return 0; } diff --git a/util/netevent.c b/util/netevent.c index 261eb3dc8..65ada7f7a 100644 --- a/util/netevent.c +++ b/util/netevent.c @@ -1746,6 +1746,16 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg) } #endif + if(event&UB_EV_TIMEOUT) { + verbose(VERB_QUERY, "tcp took too long, dropped"); + reclaim_tcp_handler(c); + if(!c->tcp_do_close) { + fptr_ok(fptr_whitelist_comm_point(c->callback)); + (void)(*c->callback)(c, c->cb_arg, + NETEVENT_TIMEOUT, NULL); + } + return; + } if(event&UB_EV_READ) { int has_tcpq = (c->tcp_req_info != NULL); if(!comm_point_tcp_handle_read(fd, c, 0)) { @@ -1776,16 +1786,6 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg) tcp_req_info_read_again(fd, c); return; } - if(event&UB_EV_TIMEOUT) { - verbose(VERB_QUERY, "tcp took too long, dropped"); - reclaim_tcp_handler(c); - if(!c->tcp_do_close) { - fptr_ok(fptr_whitelist_comm_point(c->callback)); - (void)(*c->callback)(c, c->cb_arg, - NETEVENT_TIMEOUT, NULL); - } - return; - } log_err("Ignored event %d for tcphdl.", event); } @@ -2390,6 +2390,16 @@ comm_point_http_handle_callback(int fd, short event, void* arg) log_assert(c->type == comm_http); ub_comm_base_now(c->ev->base); + if(event&UB_EV_TIMEOUT) { + verbose(VERB_QUERY, "http took too long, dropped"); + reclaim_http_handler(c); + if(!c->tcp_do_close) { + fptr_ok(fptr_whitelist_comm_point(c->callback)); + (void)(*c->callback)(c, c->cb_arg, + NETEVENT_TIMEOUT, NULL); + } + return; + } if(event&UB_EV_READ) { if(!comm_point_http_handle_read(fd, c)) { reclaim_http_handler(c); @@ -2414,16 +2424,6 @@ comm_point_http_handle_callback(int fd, short event, void* arg) } return; } - if(event&UB_EV_TIMEOUT) { - verbose(VERB_QUERY, "http took too long, dropped"); - reclaim_http_handler(c); - if(!c->tcp_do_close) { - fptr_ok(fptr_whitelist_comm_point(c->callback)); - (void)(*c->callback)(c, c->cb_arg, - NETEVENT_TIMEOUT, NULL); - } - return; - } log_err("Ignored event %d for httphdl.", event); } @@ -3146,8 +3146,8 @@ comm_point_stop_listening(struct comm_point* c) void comm_point_start_listening(struct comm_point* c, int newfd, int msec) { - verbose(VERB_ALGO, "comm point start listening %d", - c->fd==-1?newfd:c->fd); + verbose(VERB_ALGO, "comm point start listening %d (%d msec)", + c->fd==-1?newfd:c->fd, msec); if(c->type == comm_tcp_accept && !c->tcp_free) { /* no use to start listening no free slots. */ return;