From 72452c8e1cf4321fa8a8bf9da950919866871df0 Mon Sep 17 00:00:00 2001 From: Grigorii Demidov Date: Wed, 7 Mar 2018 17:21:07 +0100 Subject: [PATCH] lib/nsrep: some changes in NS selection algorythm --- daemon/lua/kres-gen.lua | 1 + daemon/worker.c | 2 +- lib/nsrep.c | 4 ++-- lib/resolve.c | 10 +++++++--- lib/rplan.h | 1 + modules/serve_stale/serve_stale.lua | 4 ++-- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index a0210962e..c2d1bce17 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -96,6 +96,7 @@ struct kr_qflags { _Bool FORWARD : 1; _Bool DNS64_MARK : 1; _Bool CACHE_TRIED : 1; + _Bool NO_NS_FOUND : 1; }; typedef struct { knot_rrset_t **at; diff --git a/daemon/worker.c b/daemon/worker.c index 62b87ee26..95ce4078b 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -1329,7 +1329,7 @@ static void on_udp_timeout(uv_timer_t *timer) VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str); } kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_TIMEOUT, - worker->engine->resolver.cache_rtt, KR_NS_UPDATE); + worker->engine->resolver.cache_rtt, KR_NS_RESET); } } task->timeouts += 1; diff --git a/lib/nsrep.c b/lib/nsrep.c index b25453b49..30101a3db 100644 --- a/lib/nsrep.c +++ b/lib/nsrep.c @@ -162,10 +162,10 @@ static int eval_nsrep(const char *k, void *v, void *baton) * The fastest NS is preferred by workers until it is depleted (timeouts or degrades), * at the same time long distance scouts probe other sources (low probability). * Servers on TIMEOUT (depleted) can be probed by the dice roll only */ - if (score <= ns->score && (qry->flags.NO_THROTTLE || score < KR_NS_TIMEOUT)) { + if (score <= ns->score && (score < KR_NS_LONG || (qry->flags.NO_THROTTLE && (score < KR_NS_TIMEOUT)))) { update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score); ns->reputation = reputation; - } else { + } else if (score < KR_NS_TIMEOUT) { /* With 10% chance, probe server with a probability given by its RTT / MAX_RTT */ if ((kr_rand_uint(100) < 10) && (kr_rand_uint(KR_NS_MAX_SCORE) >= score)) { /* If this is a low-reliability probe, go with TCP to get ICMP reachability check. */ diff --git a/lib/resolve.c b/lib/resolve.c index c0836201c..759cb86ee 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -863,7 +863,7 @@ static void update_nslist_score(struct kr_request *request, struct kr_query *qry } /* Penalise resolution failures except validation failures. */ } else if (!(qry->flags.DNSSEC_BOGUS)) { - kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_RESET); + kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_UPDATE); WITH_VERBOSE(qry) { char addr_str[INET6_ADDRSTRLEN]; inet_ntop(src->sa_family, kr_inaddr(src), addr_str, sizeof(addr_str)); @@ -1433,8 +1433,12 @@ ns_election: } else { VERBOSE_MSG(qry, "=> no valid NS left\n"); } - ITERATE_LAYERS(request, qry, reset); - kr_rplan_pop(rplan, qry); + if (!qry->flags.NO_NS_FOUND) { + qry->flags.NO_NS_FOUND = true; + } else { + ITERATE_LAYERS(request, qry, reset); + kr_rplan_pop(rplan, qry); + } return KR_STATE_PRODUCE; } } diff --git a/lib/rplan.h b/lib/rplan.h index 0600b3c89..adb67eaeb 100644 --- a/lib/rplan.h +++ b/lib/rplan.h @@ -62,6 +62,7 @@ struct kr_qflags { bool FORWARD : 1; /**< Forward all queries to upstream; validate answers. */ bool DNS64_MARK : 1; /**< Internal mark for dns64 module. */ bool CACHE_TRIED : 1; /**< Internal to cache module. */ + bool NO_NS_FOUND : 1; /**< No valid NS found during last PRODUCE stage. */ }; /** Combine flags together. This means set union for simple flags. */ diff --git a/modules/serve_stale/serve_stale.lua b/modules/serve_stale/serve_stale.lua index c8677ba1e..2c3971a8d 100644 --- a/modules/serve_stale/serve_stale.lua +++ b/modules/serve_stale/serve_stale.lua @@ -26,8 +26,8 @@ M.layer = { local now = ffi.C.kr_now() local deadline = qry.creation_time_mono + M.timeout - if now > deadline then - --log('[ ][stal] => deadline has passed') + if now > deadline or qry.flags.NO_NS_FOUND then + log('[ ][stal] => deadline has passed') qry.stale_cb = M.callback -- TODO: probably start the same request that doesn't stale-serve, -- but first we need some detection of non-interactive / internal requests. -- 2.47.2