]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
- Fix that auth zone fails over to next master for timeout in tcp.
authorWouter Wijngaards <wouter@nlnetlabs.nl>
Thu, 11 Apr 2019 13:41:53 +0000 (13:41 +0000)
committerWouter Wijngaards <wouter@nlnetlabs.nl>
Thu, 11 Apr 2019 13:41:53 +0000 (13:41 +0000)
git-svn-id: file:///svn/unbound/trunk@5155 be551aaa-1e26-0410-a405-d3ace91eadb9

doc/Changelog
services/authzone.c
services/authzone.h
util/fptr_wlist.c
util/netevent.c

index edb574cb2cce0115f5ec6fe97c8573ee3bf4f83a..03d6139900670fa51d473deef18f4ddbac4dad0c 100644 (file)
@@ -2,6 +2,7 @@
        - Fix that auth zone uses correct network type for sockets for
          SOA serial probes.  This fixes that probes fail because earlier
          probe addresses are unreachable.
+       - Fix that auth zone fails over to next master for timeout in tcp.
 
 8 April 2019: Wouter
        - Fix to use event_assign with libevent for thread-safety.
index c3691bd6cdc0f38b81bdf56251818c35e834a324..e479e13c623870973a83fed06830a98c021e18e1 100644 (file)
@@ -2042,11 +2042,13 @@ auth_xfer_delete(struct auth_xfer* xfr)
        if(xfr->task_probe) {
                auth_free_masters(xfr->task_probe->masters);
                comm_point_delete(xfr->task_probe->cp);
+               comm_timer_delete(xfr->task_probe->timer);
                free(xfr->task_probe);
        }
        if(xfr->task_transfer) {
                auth_free_masters(xfr->task_transfer->masters);
                comm_point_delete(xfr->task_transfer->cp);
+               comm_timer_delete(xfr->task_transfer->timer);
                if(xfr->task_transfer->chunks_first) {
                        auth_chunks_delete(xfr->task_transfer);
                }
@@ -4973,6 +4975,9 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env,
 static void
 xfr_transfer_disown(struct auth_xfer* xfr)
 {
+       /* remove timer (from this worker's event base) */
+       comm_timer_delete(xfr->task_transfer->timer);
+       xfr->task_transfer->timer = NULL;
        /* remove the commpoint */
        comm_point_delete(xfr->task_transfer->cp);
        xfr->task_transfer->cp = NULL;
@@ -5054,6 +5059,8 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
        struct sockaddr_storage addr;
        socklen_t addrlen = 0;
        struct auth_master* master = xfr->task_transfer->master;
+       struct timeval t;
+       int timeout;
        if(!master) return 0;
        if(master->allow_notify) return 0; /* only for notify */
 
@@ -5079,17 +5086,31 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
                comm_point_delete(xfr->task_transfer->cp);
                xfr->task_transfer->cp = NULL;
        }
+       if(!xfr->task_transfer->timer) {
+               xfr->task_transfer->timer = comm_timer_create(env->worker_base,
+                       auth_xfer_transfer_timer_callback, xfr);
+               if(!xfr->task_transfer->timer) {
+                       log_err("malloc failure");
+                       return 0;
+               }
+       }
+       timeout = AUTH_TRANSFER_TIMEOUT;
+#ifndef S_SPLINT_S
+        t.tv_sec = timeout/1000;
+        t.tv_usec = (timeout%1000)*1000;
+#endif
 
        if(master->http) {
                /* perform http fetch */
                /* store http port number into sockaddr,
                 * unless someone used unbound's host@port notation */
+               xfr->task_transfer->on_ixfr = 0;
                if(strchr(master->host, '@') == NULL)
                        sockaddr_store_port(&addr, addrlen, master->port);
                xfr->task_transfer->cp = outnet_comm_point_for_http(
                        env->outnet, auth_xfer_transfer_http_callback, xfr,
-                       &addr, addrlen, AUTH_TRANSFER_TIMEOUT, master->ssl,
-                       master->host, master->file);
+                       &addr, addrlen, -1, master->ssl, master->host,
+                       master->file);
                if(!xfr->task_transfer->cp) {
                        char zname[255+1], as[256];
                        dname_str(xfr->name, zname);
@@ -5098,6 +5119,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
                                "connection for %s to %s", zname, as);
                        return 0;
                }
+               comm_timer_set(xfr->task_transfer->timer, &t);
                if(verbosity >= VERB_ALGO) {
                        char zname[255+1], as[256];
                        dname_str(xfr->name, zname);
@@ -5117,7 +5139,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
        /* connect on fd */
        xfr->task_transfer->cp = outnet_comm_point_for_tcp(env->outnet,
                auth_xfer_transfer_tcp_callback, xfr, &addr, addrlen,
-               env->scratch_buffer, AUTH_TRANSFER_TIMEOUT);
+               env->scratch_buffer, -1);
        if(!xfr->task_transfer->cp) {
                char zname[255+1], as[256];
                dname_str(xfr->name, zname);
@@ -5126,6 +5148,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
                        "xfr %s to %s", zname, as);
                return 0;
        }
+       comm_timer_set(xfr->task_transfer->timer, &t);
        if(verbosity >= VERB_ALGO) {
                char zname[255+1], as[256];
                dname_str(xfr->name, zname);
@@ -5678,6 +5701,47 @@ process_list_end_transfer(struct auth_xfer* xfr, struct module_env* env)
        xfr_transfer_nexttarget_or_end(xfr, env);
 }
 
+/** callback for the task_transfer timer */
+void
+auth_xfer_transfer_timer_callback(void* arg)
+{
+       struct auth_xfer* xfr = (struct auth_xfer*)arg;
+       struct module_env* env;
+       int gonextonfail = 1;
+       log_assert(xfr->task_transfer);
+       lock_basic_lock(&xfr->lock);
+       env = xfr->task_transfer->env;
+       if(env->outnet->want_to_quit) {
+               lock_basic_unlock(&xfr->lock);
+               return; /* stop on quit */
+       }
+
+       verbose(VERB_ALGO, "xfr stopped, connection timeout to %s",
+               xfr->task_transfer->master->host);
+
+       /* see if IXFR caused the failure, if so, try AXFR */
+       if(xfr->task_transfer->on_ixfr) {
+               xfr->task_transfer->ixfr_possible_timeout_count++;
+               if(xfr->task_transfer->ixfr_possible_timeout_count >=
+                       NUM_TIMEOUTS_FALLBACK_IXFR) {
+                       verbose(VERB_ALGO, "xfr to %s, fallback "
+                               "from IXFR to AXFR (because of timeouts)",
+                               xfr->task_transfer->master->host);
+                       xfr->task_transfer->ixfr_fail = 1;
+                       gonextonfail = 0;
+               }
+       }
+
+       /* delete transferred data from list */
+       auth_chunks_delete(xfr->task_transfer);
+       comm_point_delete(xfr->task_transfer->cp);
+       xfr->task_transfer->cp = NULL;
+       if(gonextonfail)
+               xfr_transfer_nextmaster(xfr);
+       xfr_transfer_nexttarget_or_end(xfr, env);
+       return;
+}
+
 /** callback for task_transfer tcp connections */
 int
 auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err,
@@ -5694,6 +5758,8 @@ auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err,
                lock_basic_unlock(&xfr->lock);
                return 0; /* stop on quit */
        }
+       /* stop the timer */
+       comm_timer_disable(xfr->task_transfer->timer);
 
        if(err != NETEVENT_NOERROR) {
                /* connection failed, closed, or timeout */
@@ -5774,6 +5840,8 @@ auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err,
                return 0; /* stop on quit */
        }
        verbose(VERB_ALGO, "auth zone transfer http callback");
+       /* stop the timer */
+       comm_timer_disable(xfr->task_transfer->timer);
 
        if(err != NETEVENT_NOERROR && err != NETEVENT_DONE) {
                /* connection failed, closed, or timeout */
@@ -5973,13 +6041,12 @@ auth_xfer_probe_timer_callback(void* arg)
                return; /* stop on quit */
        }
 
+       if(verbosity >= VERB_ALGO) {
+               char zname[255+1];
+               dname_str(xfr->name, zname);
+               verbose(VERB_ALGO, "auth zone %s soa probe timeout", zname);
+       }
        if(xfr->task_probe->timeout <= AUTH_PROBE_TIMEOUT_STOP) {
-               if(verbosity >= VERB_ALGO) {
-                       char zname[255+1];
-                       dname_str(xfr->name, zname);
-                       verbose(VERB_ALGO, "auth zone %s soa probe timeout",
-                               zname);
-               }
                /* try again with bigger timeout */
                if(xfr_probe_send_probe(xfr, env, xfr->task_probe->timeout*2)) {
                        lock_basic_unlock(&xfr->lock);
index 376fe5144091f11ebcd37e426431f9ee4d176370..a695bd029b5623f8a397d0d05dc7fa715377d7b7 100644 (file)
@@ -400,6 +400,9 @@ struct auth_transfer {
        /** the transfer (TCP) to the master.
         * on the workers event base. */
        struct comm_point* cp;
+       /** timeout for the transfer.
+        * on the workers event base. */
+       struct comm_timer* timer;
 };
 
 /** list of addresses */
@@ -649,6 +652,8 @@ int auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err,
         struct comm_reply* repinfo);
 /** xfer probe timeout callback, part of task_probe */
 void auth_xfer_probe_timer_callback(void* arg);
+/** xfer transfer timeout callback, part of task_transfer */
+void auth_xfer_transfer_timer_callback(void* arg);
 /** mesh callback for task_probe on lookup of host names */
 void auth_xfer_probe_lookup_callback(void* arg, int rcode,
        struct sldns_buffer* buf, enum sec_status sec, char* why_bogus,
index 02f85e8dc4ab756a1f1bc539323411e11ee5d23e..94d23fa3a32af37d65db8cb79abf27575f7871ee 100644 (file)
@@ -127,6 +127,7 @@ fptr_whitelist_comm_timer(void (*fptr)(void*))
 #endif
        else if(fptr == &auth_xfer_timer) return 1;
        else if(fptr == &auth_xfer_probe_timer_callback) return 1;
+       else if(fptr == &auth_xfer_transfer_timer_callback) return 1;
        return 0;
 }
 
index 261eb3dc8ca16e3337d4907a2929e82fef58ec4e..65ada7f7a6065f388c22082ef64f565375fd146c 100644 (file)
@@ -1746,6 +1746,16 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg)
        }
 #endif
 
+       if(event&UB_EV_TIMEOUT) {
+               verbose(VERB_QUERY, "tcp took too long, dropped");
+               reclaim_tcp_handler(c);
+               if(!c->tcp_do_close) {
+                       fptr_ok(fptr_whitelist_comm_point(c->callback));
+                       (void)(*c->callback)(c, c->cb_arg,
+                               NETEVENT_TIMEOUT, NULL);
+               }
+               return;
+       }
        if(event&UB_EV_READ) {
                int has_tcpq = (c->tcp_req_info != NULL);
                if(!comm_point_tcp_handle_read(fd, c, 0)) {
@@ -1776,16 +1786,6 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg)
                        tcp_req_info_read_again(fd, c);
                return;
        }
-       if(event&UB_EV_TIMEOUT) {
-               verbose(VERB_QUERY, "tcp took too long, dropped");
-               reclaim_tcp_handler(c);
-               if(!c->tcp_do_close) {
-                       fptr_ok(fptr_whitelist_comm_point(c->callback));
-                       (void)(*c->callback)(c, c->cb_arg,
-                               NETEVENT_TIMEOUT, NULL);
-               }
-               return;
-       }
        log_err("Ignored event %d for tcphdl.", event);
 }
 
@@ -2390,6 +2390,16 @@ comm_point_http_handle_callback(int fd, short event, void* arg)
        log_assert(c->type == comm_http);
        ub_comm_base_now(c->ev->base);
 
+       if(event&UB_EV_TIMEOUT) {
+               verbose(VERB_QUERY, "http took too long, dropped");
+               reclaim_http_handler(c);
+               if(!c->tcp_do_close) {
+                       fptr_ok(fptr_whitelist_comm_point(c->callback));
+                       (void)(*c->callback)(c, c->cb_arg,
+                               NETEVENT_TIMEOUT, NULL);
+               }
+               return;
+       }
        if(event&UB_EV_READ) {
                if(!comm_point_http_handle_read(fd, c)) {
                        reclaim_http_handler(c);
@@ -2414,16 +2424,6 @@ comm_point_http_handle_callback(int fd, short event, void* arg)
                }
                return;
        }
-       if(event&UB_EV_TIMEOUT) {
-               verbose(VERB_QUERY, "http took too long, dropped");
-               reclaim_http_handler(c);
-               if(!c->tcp_do_close) {
-                       fptr_ok(fptr_whitelist_comm_point(c->callback));
-                       (void)(*c->callback)(c, c->cb_arg,
-                               NETEVENT_TIMEOUT, NULL);
-               }
-               return;
-       }
        log_err("Ignored event %d for httphdl.", event);
 }
 
@@ -3146,8 +3146,8 @@ comm_point_stop_listening(struct comm_point* c)
 void 
 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
 {
-       verbose(VERB_ALGO, "comm point start listening %d", 
-               c->fd==-1?newfd:c->fd);
+       verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 
+               c->fd==-1?newfd:c->fd, msec);
        if(c->type == comm_tcp_accept && !c->tcp_free) {
                /* no use to start listening no free slots. */
                return;