]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
daemon, resolve, nsrep: improve transport failures handling when forwarding
authorGrigorii Demidov <grigorii.demidov@nic.cz>
Mon, 3 Dec 2018 12:18:01 +0000 (13:18 +0100)
committerGrigorii Demidov <grigorii.demidov@nic.cz>
Fri, 7 Dec 2018 10:23:42 +0000 (11:23 +0100)
daemon/worker.c
lib/nsrep.c
lib/nsrep.h
lib/resolve.c

index 81856fa93cdfc66192aa0ff713cdd792ffd94d6d..e8e70a76a2ff3c4d0c62fd55256d4e95429c9884 100644 (file)
@@ -688,7 +688,7 @@ static int session_tls_hs_cb(struct session *session, int status)
        if (status) {
                kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
                                    worker->engine->resolver.cache_rtt,
-                                   KR_NS_UPDATE_NORESET);
+                                   KR_NS_RESET);
                return ret;
        }
 
@@ -812,10 +812,13 @@ static void on_connect(uv_connect_t *req, int status)
        if (status != 0) {
                if (kr_verbose_status) {
                        const char *peer_str = kr_straddr(peer);
-                       kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s)\n",
+                       kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
                                        peer_str ? peer_str : "", uv_strerror(status));
                }
                worker_del_tcp_waiting(worker, peer);
+               kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
+                                   worker->engine->resolver.cache_rtt,
+                                   KR_NS_RESET);
                assert(session_tasklist_is_empty(session));
                session_waitinglist_retry(session, false);
                session_close(session);
@@ -890,7 +893,7 @@ static void on_tcp_connect_timeout(uv_timer_t *timer)
 
        kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
                            worker->engine->resolver.cache_rtt,
-                           KR_NS_UPDATE_NORESET);
+                           KR_NS_RESET);
 
        worker->stats.timeout += session_waitinglist_get_len(session);
        session_waitinglist_retry(session, true);
@@ -917,13 +920,12 @@ static void on_udp_timeout(uv_timer_t *timer)
                for (uint16_t i = 0; i < MIN(task->pending_count, task->addrlist_count); ++i) {
                        struct sockaddr *choice = (struct sockaddr *)(&addrlist[i]);
                        WITH_VERBOSE(qry) {
-                               char addr_str[INET6_ADDRSTRLEN];
-                               inet_ntop(choice->sa_family, kr_inaddr(choice), addr_str, sizeof(addr_str));
-                               VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str);
+                               char *addr_str = kr_straddr(choice);
+                               VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : "");
                        }
                        kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_DEAD,
                                            worker->engine->resolver.cache_rtt,
-                                           KR_NS_UPDATE_NORESET);
+                                           KR_NS_RESET);
                }
        }
        task->timeouts += 1;
@@ -1278,11 +1280,20 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr
        }
 
        /*  Start connection process to upstream. */
-       if (uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect) != 0) {
+       ret = uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect);
+       if (ret != 0) {
                session_timer_stop(session);
                worker_del_tcp_waiting(ctx->worker, addr);
                free(conn);
                session_close(session);
+               kr_nsrep_update_rtt(NULL, addr, KR_NS_DEAD,
+                                   worker->engine->resolver.cache_rtt,
+                                   KR_NS_RESET);
+               WITH_VERBOSE (qry) {
+                       const char *peer_str = kr_straddr(peer);
+                       kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
+                                       peer_str ? peer_str : "", uv_strerror(ret));
+               }
                return kr_error(EAGAIN);
        }
 
index 47f6ac9ae58315c5716158288baafde03f2efa25..f081f77eb2d39243b69aecaf4d384c655732ca97 100644 (file)
@@ -508,7 +508,7 @@ int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx)
                        if (sa->sa_family == AF_INET) {
                                scores[i] += FAVOUR_IPV6;
                        }
-               } else if (rtt_cache_entry->score >= KR_NS_TIMEOUT) {
+               } else if (rtt_cache_entry->score >= KR_NS_FWD_TIMEOUT) {
                        uint64_t now = kr_now();
                        uint64_t elapsed = now - rtt_cache_entry->tout_timestamp;
                        scores[i] = KR_NS_MAX_SCORE + 1;
index 0318de844c82cb79c0eda6e24b07ea3a79f3a58a..8db0ce062b398820674a85a551acbd07e98cbe52 100644 (file)
@@ -31,19 +31,18 @@ struct kr_query;
   * @note RTT is measured in milliseconds.
   */
 enum kr_ns_score {
-       KR_NS_MAX_SCORE = KR_CONN_RTT_MAX,
-       KR_NS_TIMEOUT   = (95 * KR_NS_MAX_SCORE) / 100,
-       KR_NS_LONG      = (3 * KR_NS_TIMEOUT) / 4,
-       KR_NS_UNKNOWN   = KR_NS_TIMEOUT / 2,
-       KR_NS_PENALTY   = 100,
-       KR_NS_GLUED     = 10,
+       KR_NS_MAX_SCORE     = 20 * KR_CONN_RTT_MAX, /* rtt "invalid value" */
+       KR_NS_DEAD          = KR_NS_MAX_SCORE - 1,  /* NS didn't answer via UDP transport,
+                                                    * TCP connection failed or
+                                                    * TLS handshake failed */
+       KR_NS_FWD_TIMEOUT   = 10000, /* timeout for upstream recursor  */
+       KR_NS_TIMEOUT       = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
+       KR_NS_LONG          = (3 * KR_NS_TIMEOUT) / 4,
+       KR_NS_UNKNOWN       = KR_NS_TIMEOUT / 2,
+       KR_NS_PENALTY       = 100,
+       KR_NS_GLUED         = 10
 };
 
-/**
- *  See kr_nsrep_update_rtt()
- */
-#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
-
 /** If once NS was marked as "timeouted", it won't participate in NS elections
  * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one minute). */
 #define KR_NS_TIMEOUT_RETRY_INTERVAL 60000
@@ -144,8 +143,6 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx);
  * @param  ns           updated NS representation
  * @param  addr         chosen address (NULL for first)
  * @param  score        new score (i.e. RTT), see enum kr_ns_score
- *                      after two calls with score = KR_NS_DEAD and umode = KR_NS_UPDATE
- *                      server will be guaranteed to have score >= KR_NS_TIMEOUT
  * @param  cache        RTT LRU cache
  * @param  umode        update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD)
  * @return              0 on success, error code on failure
index 22d64fdeec52215b1d12fd4568d4ac1799527cac..27d10373b1b47c4b066729ed4188ab18a2e4b0bb 100644 (file)
@@ -878,12 +878,6 @@ static void update_nslist_score(struct kr_request *request, struct kr_query *qry
                } else { /* Penalize SERVFAILs. */
                        kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
                }
-       /* Penalise resolution failures except validation failures. */
-       } else if (!(qry->flags.DNSSEC_BOGUS)) {
-               kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_UPDATE);
-               WITH_VERBOSE(qry) {
-                       VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", kr_straddr(src));
-               }
        }
 }
 
@@ -1428,6 +1422,8 @@ ns_election:
        } else if (qflg.FORWARD || qflg.STUB) {
                kr_nsrep_sort(&qry->ns, request->ctx);
                if (qry->ns.score > KR_NS_MAX_SCORE) {
+                       /* At the moment all NS have bad reputation.
+                        * But there can be existing connections*/
                        VERBOSE_MSG(qry, "=> no valid NS left\n");
                        return KR_STATE_FAIL;
                }
@@ -1468,14 +1464,14 @@ ns_election:
                return KR_STATE_PRODUCE;
        }
 
-       /* Randomize query case (if not in safemode or turned off) */
+       /* Randomize query case (if not in safe mode or turned off) */
        qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20)
                        ? 0 : kr_rand_uint(0);
        knot_dname_t *qname_raw = knot_pkt_qname(packet);
        randomized_qname_case(qname_raw, qry->secret);
 
        /*
-        * Additional query is going to be finalised when calling
+        * Additional query is going to be finalized when calling
         * kr_resolve_checkout().
         */
        qry->timestamp_mono = kr_now();
@@ -1581,8 +1577,6 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
 
        WITH_VERBOSE(qry) {
 
-       char ns_str[INET6_ADDRSTRLEN];
-
        KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet));
        KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
        KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet));
@@ -1595,12 +1589,13 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
                if (!kr_inaddr_equal(dst, addr)) {
                        continue;
                }
-               inet_ntop(addr->sa_family, kr_inaddr(&qry->ns.addr[i].ip), ns_str, sizeof(ns_str));
+               const char *ns_str = kr_straddr(addr);
                VERBOSE_MSG(qry,
                        "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' "
                        "qname: '%s' qtype: '%s' proto: '%s'\n",
-                       qry->id, ns_str, qry->ns.score, zonecut_str,
+                       qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str,
                        qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp");
+
                break;
        }}