From: Grigorii Demidov Date: Mon, 3 Dec 2018 12:18:01 +0000 (+0100) Subject: daemon, resolve, nsrep: improve transport failures handling when forwarding X-Git-Tag: v3.2.0~15^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=63c8df64548f855608fa4eac0a6e262268fb56fc;p=thirdparty%2Fknot-resolver.git daemon, resolve, nsrep: improve transport failures handling when forwarding --- diff --git a/daemon/worker.c b/daemon/worker.c index 81856fa93..e8e70a76a 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -688,7 +688,7 @@ static int session_tls_hs_cb(struct session *session, int status) if (status) { kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD, worker->engine->resolver.cache_rtt, - KR_NS_UPDATE_NORESET); + KR_NS_RESET); return ret; } @@ -812,10 +812,13 @@ static void on_connect(uv_connect_t *req, int status) if (status != 0) { if (kr_verbose_status) { const char *peer_str = kr_straddr(peer); - kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s)\n", + kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n", peer_str ? peer_str : "", uv_strerror(status)); } worker_del_tcp_waiting(worker, peer); + kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD, + worker->engine->resolver.cache_rtt, + KR_NS_RESET); assert(session_tasklist_is_empty(session)); session_waitinglist_retry(session, false); session_close(session); @@ -890,7 +893,7 @@ static void on_tcp_connect_timeout(uv_timer_t *timer) kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD, worker->engine->resolver.cache_rtt, - KR_NS_UPDATE_NORESET); + KR_NS_RESET); worker->stats.timeout += session_waitinglist_get_len(session); session_waitinglist_retry(session, true); @@ -917,13 +920,12 @@ static void on_udp_timeout(uv_timer_t *timer) for (uint16_t i = 0; i < MIN(task->pending_count, task->addrlist_count); ++i) { struct sockaddr *choice = (struct sockaddr *)(&addrlist[i]); WITH_VERBOSE(qry) { - char addr_str[INET6_ADDRSTRLEN]; - inet_ntop(choice->sa_family, kr_inaddr(choice), addr_str, sizeof(addr_str)); - VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str); + char *addr_str = kr_straddr(choice); + VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : ""); } kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_DEAD, worker->engine->resolver.cache_rtt, - KR_NS_UPDATE_NORESET); + KR_NS_RESET); } } task->timeouts += 1; @@ -1278,11 +1280,20 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr } /* Start connection process to upstream. */ - if (uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect) != 0) { + ret = uv_tcp_connect(conn, (uv_tcp_t *)client, addr , on_connect); + if (ret != 0) { session_timer_stop(session); worker_del_tcp_waiting(ctx->worker, addr); free(conn); session_close(session); + kr_nsrep_update_rtt(NULL, addr, KR_NS_DEAD, + worker->engine->resolver.cache_rtt, + KR_NS_RESET); + WITH_VERBOSE (qry) { + const char *peer_str = kr_straddr(peer); + kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n", + peer_str ? peer_str : "", uv_strerror(ret)); + } return kr_error(EAGAIN); } diff --git a/lib/nsrep.c b/lib/nsrep.c index 47f6ac9ae..f081f77eb 100644 --- a/lib/nsrep.c +++ b/lib/nsrep.c @@ -508,7 +508,7 @@ int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx) if (sa->sa_family == AF_INET) { scores[i] += FAVOUR_IPV6; } - } else if (rtt_cache_entry->score >= KR_NS_TIMEOUT) { + } else if (rtt_cache_entry->score >= KR_NS_FWD_TIMEOUT) { uint64_t now = kr_now(); uint64_t elapsed = now - rtt_cache_entry->tout_timestamp; scores[i] = KR_NS_MAX_SCORE + 1; diff --git a/lib/nsrep.h b/lib/nsrep.h index 0318de844..8db0ce062 100644 --- a/lib/nsrep.h +++ b/lib/nsrep.h @@ -31,19 +31,18 @@ struct kr_query; * @note RTT is measured in milliseconds. */ enum kr_ns_score { - KR_NS_MAX_SCORE = KR_CONN_RTT_MAX, - KR_NS_TIMEOUT = (95 * KR_NS_MAX_SCORE) / 100, - KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4, - KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2, - KR_NS_PENALTY = 100, - KR_NS_GLUED = 10, + KR_NS_MAX_SCORE = 20 * KR_CONN_RTT_MAX, /* rtt "invalid value" */ + KR_NS_DEAD = KR_NS_MAX_SCORE - 1, /* NS didn't answer via UDP transport, + * TCP connection failed or + * TLS handshake failed */ + KR_NS_FWD_TIMEOUT = 10000, /* timeout for upstream recursor */ + KR_NS_TIMEOUT = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */ + KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4, + KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2, + KR_NS_PENALTY = 100, + KR_NS_GLUED = 10 }; -/** - * See kr_nsrep_update_rtt() - */ -#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3) - /** If once NS was marked as "timeouted", it won't participate in NS elections * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one minute). */ #define KR_NS_TIMEOUT_RETRY_INTERVAL 60000 @@ -144,8 +143,6 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx); * @param ns updated NS representation * @param addr chosen address (NULL for first) * @param score new score (i.e. RTT), see enum kr_ns_score - * after two calls with score = KR_NS_DEAD and umode = KR_NS_UPDATE - * server will be guaranteed to have score >= KR_NS_TIMEOUT * @param cache RTT LRU cache * @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD) * @return 0 on success, error code on failure diff --git a/lib/resolve.c b/lib/resolve.c index 22d64fdee..27d10373b 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -878,12 +878,6 @@ static void update_nslist_score(struct kr_request *request, struct kr_query *qry } else { /* Penalize SERVFAILs. */ kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD); } - /* Penalise resolution failures except validation failures. */ - } else if (!(qry->flags.DNSSEC_BOGUS)) { - kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_UPDATE); - WITH_VERBOSE(qry) { - VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", kr_straddr(src)); - } } } @@ -1428,6 +1422,8 @@ ns_election: } else if (qflg.FORWARD || qflg.STUB) { kr_nsrep_sort(&qry->ns, request->ctx); if (qry->ns.score > KR_NS_MAX_SCORE) { + /* At the moment all NS have bad reputation. + * But there can be existing connections*/ VERBOSE_MSG(qry, "=> no valid NS left\n"); return KR_STATE_FAIL; } @@ -1468,14 +1464,14 @@ ns_election: return KR_STATE_PRODUCE; } - /* Randomize query case (if not in safemode or turned off) */ + /* Randomize query case (if not in safe mode or turned off) */ qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20) ? 0 : kr_rand_uint(0); knot_dname_t *qname_raw = knot_pkt_qname(packet); randomized_qname_case(qname_raw, qry->secret); /* - * Additional query is going to be finalised when calling + * Additional query is going to be finalized when calling * kr_resolve_checkout(). */ qry->timestamp_mono = kr_now(); @@ -1581,8 +1577,6 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src, WITH_VERBOSE(qry) { - char ns_str[INET6_ADDRSTRLEN]; - KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet)); KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name); KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet)); @@ -1595,12 +1589,13 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src, if (!kr_inaddr_equal(dst, addr)) { continue; } - inet_ntop(addr->sa_family, kr_inaddr(&qry->ns.addr[i].ip), ns_str, sizeof(ns_str)); + const char *ns_str = kr_straddr(addr); VERBOSE_MSG(qry, "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' " "qname: '%s' qtype: '%s' proto: '%s'\n", - qry->id, ns_str, qry->ns.score, zonecut_str, + qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str, qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp"); + break; }}