]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
- Fix bug where fallback_tcp causes wrong roundtrip and edns
authorWouter Wijngaards <wouter@nlnetlabs.nl>
Thu, 21 Oct 2010 15:11:39 +0000 (15:11 +0000)
committerWouter Wijngaards <wouter@nlnetlabs.nl>
Thu, 21 Oct 2010 15:11:39 +0000 (15:11 +0000)
  observation to be noted in cache.  Fix bug where EDNSprobe halted
  exponential backoff if EDNS status unknown.
- new unresponsive host method, exponentially increasing block backoff.
- iana portlist updated.

git-svn-id: file:///svn/unbound/trunk@2303 be551aaa-1e26-0410-a405-d3ace91eadb9

daemon/cachedump.c
doc/Changelog
iterator/iter_utils.c
iterator/iterator.h
services/cache/infra.c
services/cache/infra.h
services/outside_network.c
util/iana_ports.inc
util/rtt.c
util/rtt.h

index ee88b11c2455ddfa35d29519763d05cad83bc1cc..5a420db60bc1f7d7dab8bca420a15a291e978ce1 100644 (file)
@@ -803,6 +803,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
        char buf[257];
        struct delegpt_addr* a;
        int lame, dlame, rlame, rtt, edns_vs, to, lost;
+       int entry_ttl, clean_rtt, backoff;
        uint8_t edns_lame_known;
        for(a = dp->target_list; a; a = a->next_target) {
                addr_to_str(&a->addr, a->addrlen, buf, sizeof(buf));
@@ -813,6 +814,20 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
                                return;
                }
                /* lookup in infra cache */
+               entry_ttl = infra_get_host_rto(worker->env.infra_cache,
+                       &a->addr, a->addrlen, &clean_rtt, &rtt, &backoff,
+                       *worker->env.now);
+               if(entry_ttl == -1) {
+                       if(!ssl_printf(ssl, "not in infra cache.\n"))
+                               return;
+                       continue; /* skip stuff not in infra cache */
+               } else if(entry_ttl == -2) {
+                       if(!ssl_printf(ssl, "not in infra cache "
+                               "(backoff %d).\n", backoff))
+                               return;
+                       continue; /* skip stuff not in infra cache */
+               }
+
                /* uses type_A because most often looked up, but other
                 * lameness won't be reported then */
                if(!infra_get_lame_rtt(worker->env.infra_cache, 
@@ -823,20 +838,28 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
                                return;
                        continue; /* skip stuff not in infra cache */
                }
-               if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost",
+               if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost, ttl %d",
                        lame?"LAME ":"", dlame?"NoDNSSEC ":"",
                        a->lame?"AddrWasParentSide ":"",
-                       rlame?"NoAuthButRecursive ":"", rtt, lost))
+                       rlame?"NoAuthButRecursive ":"", rtt, lost, entry_ttl))
                        return;
+               if(rtt != clean_rtt && clean_rtt != 376 /* unknown */) {
+                       if(!ssl_printf(ssl, ", ping %d", clean_rtt))
+                               return;
+               }
+               if(backoff != INFRA_BACKOFF_INITIAL) {
+                       if(!ssl_printf(ssl, ", backoff %d", backoff))
+                               return;
+               }
                if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen,
                        *worker->env.now, &edns_vs, &edns_lame_known, &to)) {
                        if(edns_vs == -1) {
-                               if(!ssl_printf(ssl, "noEDNS%s.",
-                                       edns_lame_known?" probed":""))
+                               if(!ssl_printf(ssl, "noEDNS%s.",
+                                       edns_lame_known?" probed":" assumed"))
                                        return;
                        } else {
-                               if(!ssl_printf(ssl, "EDNS %d%s.",
-                                       edns_vs, edns_lame_known?" probed":""))
+                               if(!ssl_printf(ssl, ", EDNS %d%s.", edns_vs,
+                                       edns_lame_known?" probed":" assumed"))
                                        return;
                        }
                }
index 09c8bd0c4b69072fd72a0d704dede962f63559c0..98fad2e46d4f50f1c2f6901717c9214402cd28fd 100644 (file)
@@ -1,3 +1,10 @@
+21 October 2010: Wouter
+       - Fix bug where fallback_tcp causes wrong roundtrip and edns
+         observation to be noted in cache.  Fix bug where EDNSprobe halted
+         exponential backoff if EDNS status unknown.
+       - new unresponsive host method, exponentially increasing block backoff.
+       - iana portlist updated.
+
 20 October 2010: Wouter
        - interface automatic works for some people with ip6 disabled.
          Therefore the error check is removed, so they can use the option.
index ca1781a1bc6b10b3a4d0b0e650f9c953b38eb9e1..7bb1c12440c42af881156c08112462d732366020 100644 (file)
@@ -206,11 +206,9 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
                        a->lame?" ADDR_LAME":"");
                if(lame)
                        return -1; /* server is lame */
-               else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT && 
-                       lost >= USEFUL_SERVER_MAX_LOST) {
+               else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT)
                        /* server is unresponsive */
                        return USEFUL_SERVER_TOP_TIMEOUT;
-               }
                /* select remainder from worst to best */
                else if(reclame)
                        return rtt+USEFUL_SERVER_TOP_TIMEOUT*3; /* nonpref */
@@ -218,8 +216,6 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
                        return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
                else if(a->lame)
                        return rtt+USEFUL_SERVER_TOP_TIMEOUT+1; /* nonpref */
-               else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT) /* not blacklisted*/
-                       return USEFUL_SERVER_TOP_TIMEOUT+1;
                else    return rtt;
        }
        /* no server information present */
index a2d3eab479f76c708c3bea6df12624fd40e4932c..24fa6a3b5da0b250cb284f40692bb2739a114910 100644 (file)
@@ -75,6 +75,8 @@ struct iter_priv;
  * Chosen so that the UNKNOWN_SERVER_NICENESS falls within the band of a 
  * fast server, this causes server exploration as a side benefit. msec. */
 #define RTT_BAND 400
+/** Start value for blacklisting a host, 2*USEFUL_SERVER_TOP_TIMEOUT in sec */
+#define INFRA_BACKOFF_INITIAL 240
 
 /**
  * Global state for the iterator. 
index 2da7c399693a16fad164350aa15ccd34484dc44a..a2cd5996fbd501bd3656290d96638a71459a3f93 100644 (file)
@@ -190,7 +190,7 @@ infra_lookup_host(struct infra_cache* infra,
        return data;
 }
 
-/** init the host elements (not lame elems) */
+/** init the host elements (not lame elems, not backoff) */
 static void
 host_entry_init(struct infra_cache* infra, struct lruhash_entry* e, 
        uint32_t timenow)
@@ -233,6 +233,7 @@ new_host_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
        key->addrlen = addrlen;
        memcpy(&key->addr, addr, addrlen);
        data->lameness = NULL;
+       data->backoff = INFRA_BACKOFF_INITIAL;
        host_entry_init(infra, &key->entry, tm);
        return &key->entry;
 }
@@ -270,14 +271,6 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
        /* use existing entry */
        data = (struct infra_host_data*)e->data;
        *to = rtt_timeout(&data->rtt);
-       if(*to >= USEFUL_SERVER_TOP_TIMEOUT &&
-               data->num_timeouts < USEFUL_SERVER_MAX_LOST)
-               /* use smaller timeout, backoff does not work
-                * The server seems to still reply but sporadically.
-                * Perhaps it has rate-limited the traffic, or it
-                * drops particular queries (AAAA).  ignore timeouts,
-                * and use the jostle timeout for rtt estimate. */
-               *to = (int)infra->jostle;
        *edns_vs = data->edns_version;
        *edns_lame_known = data->edns_lame_known;
        lock_rw_unlock(&e->lock);
@@ -491,11 +484,29 @@ infra_rtt_update(struct infra_cache* infra,
        /* have an entry, update the rtt */
        data = (struct infra_host_data*)e->data;
        if(roundtrip == -1) {
+               int o = rtt_timeout(&data->rtt);
                rtt_lost(&data->rtt, orig_rtt);
+               if(rtt_timeout(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT
+                       && o < USEFUL_SERVER_TOP_TIMEOUT) {
+                       /* backoff the blacklisted timeout */
+                       log_addr(VERB_ALGO, "backoff for", addr, addrlen);
+                       data->backoff *= 2;
+                       if(data->backoff >= 24*3600)
+                               data->backoff = 24*3600;
+                       verbose(VERB_ALGO, "backoff to %d", data->backoff);
+                       /* increase the infra item TTL */
+                       data->ttl = timenow + data->backoff;
+               }
+
                if(data->num_timeouts<255)
                        data->num_timeouts++; 
        } else {
                rtt_update(&data->rtt, roundtrip);
+               /* un-backoff the element */
+               if(data->backoff > (uint32_t)infra->host_ttl*2)
+                       data->backoff = (uint32_t)infra->host_ttl*2;
+               else    data->backoff = INFRA_BACKOFF_INITIAL;
+
                data->num_timeouts = 0;
        }
        if(data->rtt.rto > 0)
@@ -507,6 +518,26 @@ infra_rtt_update(struct infra_cache* infra,
        return rto;
 }
 
+int infra_get_host_rto(struct infra_cache* infra,
+        struct sockaddr_storage* addr, socklen_t addrlen,
+       int* rtt, int* rto, int* backoff, uint32_t timenow)
+{
+       struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr, 
+               addrlen, 0);
+       struct infra_host_data* data;
+       int ttl = -2;
+       if(!e) return -1;
+       data = (struct infra_host_data*)e->data;
+       *backoff = (int)data->backoff;
+       if(data->ttl >= timenow) {
+               ttl = (int)(data->ttl - timenow);
+               *rtt = rtt_notimeout(&data->rtt);
+               *rto = rtt_unclamped(&data->rtt);
+       }
+       lock_rw_unlock(&e->lock);
+       return ttl;
+}
+
 int 
 infra_edns_update(struct infra_cache* infra,
         struct sockaddr_storage* addr, socklen_t addrlen,
index e70ed968fb0964e3163eb0ce8219698a7f32a33d..956edd23ffe6e8f541be1b8277d1ef7cd8add3b6 100644 (file)
@@ -64,6 +64,8 @@ struct infra_host_key {
 struct infra_host_data {
        /** TTL value for this entry. absolute time. */
        uint32_t ttl;
+       /** backoff time if blacklisted unresponsive. in seconds. */
+       uint32_t backoff;
        /** round trip times for timeout calculation */
        struct rtt_info rtt;
        /** Names of the zones that are lame. NULL=no lame zones. */
@@ -286,6 +288,22 @@ int infra_get_lame_rtt(struct infra_cache* infra,
        int* lame, int* dnsseclame, int* reclame, int* rtt, int* lost,
        uint32_t timenow);
 
+/**
+ * Get additional (debug) info on timing.
+ * @param infra: infra cache.
+ * @param addr: host address.
+ * @param addrlen: length of addr.
+ * @param rtt: the clean rtt time (of working replies).
+ * @param rto: the rtt with timeouts applied. (rtt as returned by other funcs).
+ * @param backoff: the backoff time for blacked entries.
+ * @param timenow: what time it is now.
+ * @return TTL the infra host element is valid for. If -1: not found in cache.
+ *     If -2: found in cache, but TTL was not valid, only backoff is filled.
+ */
+int infra_get_host_rto(struct infra_cache* infra,
+        struct sockaddr_storage* addr, socklen_t addrlen, 
+       int* rtt, int* rto, int* backoff, uint32_t timenow);
+
 /**
  * Get memory used by the infra cache.
  * @param infra: infrastructure cache.
index 151abf1d2d94a55f1ea1bc1e573d1f9cbd4aebd0..bddbb560872335124006926122f05c5dc830199b 100644 (file)
@@ -1305,6 +1305,7 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff)
        if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, now, &vs,
                &edns_lame_known, &rtt))
                return 0;
+       sq->last_rtt = rtt;
        if(sq->status == serviced_initial) {
                if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
                        /* perform EDNS lame probe - check if server is
@@ -1321,7 +1322,6 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff)
        }
        serviced_encode(sq, buff, sq->status == serviced_query_UDP_EDNS);
        sq->last_sent_time = *sq->outnet->now_tv;
-       sq->last_rtt = rtt;
        sq->edns_lame_known = (int)edns_lame_known;
        verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
        sq->pending = pending_udp_query(sq->outnet, buff, &sq->addr, 
@@ -1584,7 +1584,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                serviced_callbacks(sq, error, c, rep);
                return 0;
        }
-       if(sq->status == serviced_query_UDP_EDNS 
+       if(!fallback_tcp) {
+           if(sq->status == serviced_query_UDP_EDNS 
                && (LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) 
                        == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
                        ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) {
@@ -1595,7 +1596,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                        serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
                }
                return 0;
-       } else if(sq->status == serviced_query_PROBE_EDNS) {
+           } else if(sq->status == serviced_query_PROBE_EDNS) {
                /* probe without EDNS succeeds, so we conclude that this
                 * host likely has EDNS packets dropped */
                log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
@@ -1607,7 +1608,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                        log_err("Out of memory caching no edns for host");
                  }
                sq->status = serviced_query_UDP;
-       } else if(sq->status == serviced_query_UDP_EDNS && 
+           } else if(sq->status == serviced_query_UDP_EDNS && 
                !sq->edns_lame_known) {
                /* now we know that edns queries received answers store that */
                if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, 
@@ -1615,7 +1616,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                        log_err("Out of memory caching edns works");
                }
                sq->edns_lame_known = 1;
-       } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
+           } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
                !sq->edns_lame_known && (LDNS_RCODE_WIRE(
                ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR || 
                LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) == 
@@ -1630,8 +1631,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                        log_err("Out of memory caching no edns for host");
                  }
                sq->status = serviced_query_UDP;
-       }
-       if(now.tv_sec > sq->last_sent_time.tv_sec ||
+           }
+           if(now.tv_sec > sq->last_sent_time.tv_sec ||
                (now.tv_sec == sq->last_sent_time.tv_sec &&
                now.tv_usec > sq->last_sent_time.tv_usec)) {
                /* convert from microseconds to milliseconds */
@@ -1642,7 +1643,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
                if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, 
                        roundtime, sq->last_rtt, (uint32_t)now.tv_sec))
                        log_err("out of memory noting rtt.");
-       }
+           }
+       } /* end of if_!fallback_tcp */
        /* perform TC flag check and TCP fallback after updating our
         * cache entries for EDNS status and RTT times */
        if(LDNS_TC_WIRE(ldns_buffer_begin(c->buffer)) || fallback_tcp) {
index 07956a67db1df2c00f2fbe7795e025ab169b0ebc..20b0242d9633554003b394ce6485f7c1fd043dfc 100644 (file)
 5100,
 5101,
 5102,
+5104,
+5105,
 5111,
 5112,
 5113,
index efd7925a5775a39f3f4a5326a6b431b4d01fd7e2..af21cc39f8e47d76ee4b362b4ec798f08878204b 100644 (file)
@@ -109,3 +109,8 @@ rtt_lost(struct rtt_info* rtt, int orig)
                        rtt->rto = RTT_MAX_TIMEOUT;
        }
 }
+
+int rtt_notimeout(const struct rtt_info* rtt)
+{
+       return calc_rto(rtt);
+}
index 9dc6c04e4e014d07ef92c9df895b4128a2969b31..1af5484c9832aa21986acc397baf1c22fc6aca33 100644 (file)
@@ -81,6 +81,13 @@ int rtt_timeout(const struct rtt_info* rtt);
  */
 int rtt_unclamped(const struct rtt_info* rtt);
 
+/**
+ * RTT for valid responses. Without timeouts.
+ * @param rtt: round trip statistics structure.
+ * @return: value in msec.
+ */
+int rtt_notimeout(const struct rtt_info* rtt);
+
 /**
  * Update the statistics with a new roundtrip estimate observation.
  * @param rtt: round trip statistics structure.