From: Diego Fronza Date: Mon, 19 Oct 2020 20:02:03 +0000 (-0300) Subject: Add stale-refresh-time option X-Git-Tag: v9.17.7~5^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4827ad0ec46e0fb30623363958289494b71de596;p=thirdparty%2Fbind9.git Add stale-refresh-time option Before this update, BIND would attempt to do a full recursive resolution process for each query received if the requested rrset had its ttl expired. If the resolution fails for any reason, only then BIND would check for stale rrset in cache (if 'stale-cache-enable' and 'stale-answer-enable' is on). The problem with this approach is that if an authoritative server is unreachable or is failing to respond, it is very unlikely that the problem will be fixed in the next seconds. A better approach to improve performance in those cases, is to mark the moment in which a resolution failed, and if new queries arrive for that same rrset, try to respond directly from the stale cache, and do that for a window of time configured via 'stale-refresh-time'. Only when this interval expires we then try to do a normal refresh of the rrset. The logic behind this commit is as following: - In query.c / query_gotanswer(), if the test of 'result' variable falls to the default case, an error is assumed to have happened, and a call to 'query_usestale()' is made to check if serving of stale rrset is enabled in configuration. - If serving of stale answers is enabled, a flag will be turned on in the query context to look for stale records: query.c:6839 qctx->client->query.dboptions |= DNS_DBFIND_STALEOK; - A call to query_lookup() will be made again, inside it a call to 'dns_db_findext()' is made, which in turn will invoke rbdb.c / cache_find(). - In rbtdb.c / cache_find() the important bits of this change is the call to 'check_stale_header()', which is a function that yields true if we should skip the stale entry, or false if we should consider it. - In check_stale_header() we now check if the DNS_DBFIND_STALEOK option is set, if that is the case we know that this new search for stale records was made due to a failure in a normal resolution, so we keep track of the time in which the failured occured in rbtdb.c:4559: header->last_refresh_fail_ts = search->now; - In check_stale_header(), if DNS_DBFIND_STALEOK is not set, then we know this is a normal lookup, if the record is stale and the query time is between last failure time + stale-refresh-time window, then we return false so cache_find() knows it can consider this stale rrset entry to return as a response. The last additions are two new methods to the database interface: - setservestale_refresh - getservestale_refresh Those were added so rbtdb can be aware of the value set in configuration option, since in that level we have no access to the view object. --- diff --git a/bin/named/config.c b/bin/named/config.c index 863feae8b35..9b0c6f06e2f 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -196,6 +196,7 @@ options {\n\ servfail-ttl 1;\n\ # sortlist \n\ stale-answer-enable false;\n\ + stale-refresh-time 30; /* 30 seconds */\n\ stale-answer-ttl 1; /* 1 second */\n\ stale-cache-enable false;\n\ synth-from-dnssec no;\n\ diff --git a/bin/named/server.c b/bin/named/server.c index a864c1d8fd9..f83473c1b2f 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -3897,6 +3897,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, size_t max_adb_size; uint32_t lame_ttl, fail_ttl; uint32_t max_stale_ttl = 0; + uint32_t stale_refresh_time = 0; dns_tsig_keyring_t *ring = NULL; dns_view_t *pview = NULL; /* Production view */ isc_mem_t *cmctx = NULL, *hmctx = NULL; @@ -4395,6 +4396,11 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, view->staleanswersok = dns_stale_answer_conf; } + obj = NULL; + result = named_config_get(maps, "stale-refresh-time", &obj); + INSIST(result == ISC_R_SUCCESS); + stale_refresh_time = cfg_obj_asduration(obj); + /* * Configure the view's cache. * @@ -4529,6 +4535,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, dns_cache_setcachesize(cache, max_cache_size); dns_cache_setservestalettl(cache, max_stale_ttl); + dns_cache_setservestalerefresh(cache, stale_refresh_time); dns_cache_detach(&cache); diff --git a/bin/tests/system/dyndb/driver/db.c b/bin/tests/system/dyndb/driver/db.c index cbeff6172be..77d335e2eac 100644 --- a/bin/tests/system/dyndb/driver/db.c +++ b/bin/tests/system/dyndb/driver/db.c @@ -589,6 +589,8 @@ static dns_dbmethods_t sampledb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/cache.c b/lib/dns/cache.c index b8e719951a0..873b825f2ed 100644 --- a/lib/dns/cache.c +++ b/lib/dns/cache.c @@ -999,6 +999,13 @@ dns_cache_getservestalettl(dns_cache_t *cache) { return (result == ISC_R_SUCCESS ? ttl : 0); } +void +dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval) { + REQUIRE(VALID_CACHE(cache)); + + (void)dns_db_setservestalerefresh(cache->db, interval); +} + /* * The cleaner task is shutting down; do the necessary cleanup. */ diff --git a/lib/dns/db.c b/lib/dns/db.c index 6db94d51b86..fa605097da8 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -1089,6 +1089,28 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { return (ISC_R_NOTIMPLEMENTED); } +isc_result_t +dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval) { + REQUIRE(DNS_DB_VALID(db)); + REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0); + + if (db->methods->setservestalerefresh != NULL) { + return ((db->methods->setservestalerefresh)(db, interval)); + } + return (ISC_R_NOTIMPLEMENTED); +} + +isc_result_t +dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval) { + REQUIRE(DNS_DB_VALID(db)); + REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0); + + if (db->methods->getservestalerefresh != NULL) { + return ((db->methods->getservestalerefresh)(db, interval)); + } + return (ISC_R_NOTIMPLEMENTED); +} + isc_result_t dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) { REQUIRE(dns_db_iszone(db)); diff --git a/lib/dns/dnsrps.c b/lib/dns/dnsrps.c index 2d261c5c6ac..0f2ffb5f353 100644 --- a/lib/dns/dnsrps.c +++ b/lib/dns/dnsrps.c @@ -967,6 +967,8 @@ static dns_dbmethods_t rpsdb_db_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/include/dns/cache.h b/lib/dns/include/dns/cache.h index ce39765ebce..0474d78dbdc 100644 --- a/lib/dns/include/dns/cache.h +++ b/lib/dns/include/dns/cache.h @@ -255,6 +255,18 @@ dns_cache_getservestalettl(dns_cache_t *cache); *\li 'cache' to be valid. */ +void +dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval); +/*%< + * Sets the length of time to wait before attempting to refresh a rrset + * if a previous attempt in doing so has failed. + * During this time window if stale rrset are available in cache they + * will be directly returned to client. + * + * Requires: + *\li 'cache' to be valid. + */ + isc_result_t dns_cache_flush(dns_cache_t *cache); /*%< diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 395e8e9679d..b79dcae0fa5 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -178,6 +178,8 @@ typedef struct dns_dbmethods { uint64_t *records, uint64_t *bytes); isc_result_t (*setservestalettl)(dns_db_t *db, dns_ttl_t ttl); isc_result_t (*getservestalettl)(dns_db_t *db, dns_ttl_t *ttl); + isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval); + isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval); isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats); isc_result_t (*adjusthashsize)(dns_db_t *db, size_t size); } dns_dbmethods_t; @@ -238,6 +240,7 @@ struct dns_dbonupdatelistener { #define DNS_DBFIND_ADDITIONALOK 0x0100 #define DNS_DBFIND_NOZONECUT 0x0200 #define DNS_DBFIND_STALEOK 0x0400 +#define DNS_DBFIND_STALEENABLED 0x0800 /*@}*/ /*@{*/ @@ -1701,6 +1704,39 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl); * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. */ +isc_result_t +dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval); +/*%< + * Sets the length of time to wait before attempting to refresh a rrset + * if a previous attempt in doing so has failed. + * During this time window if stale rrset are available in cache they + * will be directly returned to client. + * + * Requires: + * \li 'db' is a valid cache database. + * \li 'interval' is number of seconds before attempting to refresh data. + * + * Returns: + * \li #ISC_R_SUCCESS + * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. + */ + +isc_result_t +dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval); +/*%< + * Gets the length of time in which stale answers are directly returned from + * cache before attempting to refresh them, in case a previous attempt in + * doing so has failed. + * + * Requires: + * \li 'db' is a valid cache database. + * \li 'interval' is number of seconds before attempting to refresh data. + * + * Returns: + * \li #ISC_R_SUCCESS + * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. + */ + isc_result_t dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats); /*%< diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 6cad7abdd92..f8d51e9a4bb 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -205,6 +205,7 @@ typedef struct rdatasetheader { rbtdb_rdatatype_t type; atomic_uint_least16_t attributes; dns_trust_t trust; + isc_stdtime_t last_refresh_fail_ts; struct noqname *noqname; struct noqname *closest; unsigned int is_mmapped : 1; @@ -488,6 +489,13 @@ struct dns_rbtdb { */ dns_ttl_t serve_stale_ttl; + /* + * The time after a failed lookup, where stale answers from cache + * may be used directly in a DNS response without attempting a + * new iterative lookup. + */ + uint32_t serve_stale_refresh; + /* * This is a linked list used to implement the LRU cache. There will * be node_lock_count linked lists here. Nodes in bucket 1 will be @@ -4547,6 +4555,27 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header, stale > search->now) { mark_header_stale(search->rbtdb, header); *header_prev = header; + /* + * If DNS_DBFIND_STALEOK is set then it means we failed + * to resolve the name during recursion, in this case we + * mark the time in which the refresh failed. + */ + if ((search->options & DNS_DBFIND_STALEOK) != 0) { + header->last_refresh_fail_ts = search->now; + } else if ((search->options & + DNS_DBFIND_STALEENABLED) != 0 && + search->now < + (header->last_refresh_fail_ts + + search->rbtdb->serve_stale_refresh)) + { + /* + * If we are within interval between last + * refresh failure time + 'stale-refresh-time', + * then don't skip this stale entry but use it + * instead. + */ + return (false); + } return ((search->options & DNS_DBFIND_STALEOK) == 0); } @@ -8379,6 +8408,29 @@ getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { return (ISC_R_SUCCESS); } +static isc_result_t +setservestalerefresh(dns_db_t *db, uint32_t interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->serve_stale_refresh = interval; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalerefresh(dns_db_t *db, uint32_t *interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *interval = rbtdb->serve_stale_refresh; + return (ISC_R_SUCCESS); +} + static dns_dbmethods_t zone_methods = { attach, detach, beginload, @@ -8426,6 +8478,8 @@ static dns_dbmethods_t zone_methods = { attach, getsize, NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ setgluecachestats, adjusthashsize }; @@ -8476,6 +8530,8 @@ static dns_dbmethods_t cache_methods = { attach, NULL, /* getsize */ setservestalettl, getservestalettl, + setservestalerefresh, + getservestalerefresh, NULL, adjusthashsize }; diff --git a/lib/dns/sdb.c b/lib/dns/sdb.c index d525963f52a..d9de4224097 100644 --- a/lib/dns/sdb.c +++ b/lib/dns/sdb.c @@ -1309,6 +1309,8 @@ static dns_dbmethods_t sdb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/sdlz.c b/lib/dns/sdlz.c index 14b55eb074b..c8a615a0f3d 100644 --- a/lib/dns/sdlz.c +++ b/lib/dns/sdlz.c @@ -1281,6 +1281,8 @@ static dns_dbmethods_t sdlzdb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/win32/libdns.def.in b/lib/dns/win32/libdns.def.in index 6567fa4cedd..63d9530f050 100644 --- a/lib/dns/win32/libdns.def.in +++ b/lib/dns/win32/libdns.def.in @@ -82,6 +82,7 @@ dns_cache_flushname dns_cache_flushnode dns_cache_getcachesize dns_cache_getname +dns_cache_getservestalerefresh dns_cache_getservestalettl dns_cache_getstats dns_cache_load @@ -93,6 +94,7 @@ dns_cache_renderxml @END LIBXML2 dns_cache_setcachesize dns_cache_setfilename +dns_cache_setservestalerefresh dns_cache_setservestalettl dns_cache_updatestats dns_catz_add_zone @@ -198,6 +200,7 @@ dns_db_getnsec3parameters dns_db_getoriginnode dns_db_getrrsetstats dns_db_getservestalettl +dns_db_getservestalerefresh dns_db_getsigningtime dns_db_getsize dns_db_getsoaserial @@ -223,6 +226,7 @@ dns_db_serialize dns_db_setcachestats dns_db_setgluecachestats dns_db_setservestalettl +dns_db_setservestalerefresh dns_db_setsigningtime dns_db_settask dns_db_subtractrdataset diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 23cf73f477b..7551e1d8cfd 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -2051,6 +2051,7 @@ static cfg_clausedef_t view_clauses[] = { { "stale-answer-enable", &cfg_type_boolean, 0 }, { "stale-answer-ttl", &cfg_type_duration, 0 }, { "stale-cache-enable", &cfg_type_boolean, 0 }, + { "stale-refresh-time", &cfg_type_duration, 0 }, { "suppress-initial-notify", &cfg_type_boolean, CFG_CLAUSEFLAG_NYI }, { "synth-from-dnssec", &cfg_type_boolean, 0 }, { "topology", &cfg_type_bracketed_aml, CFG_CLAUSEFLAG_ANCIENT }, diff --git a/lib/ns/query.c b/lib/ns/query.c index fd4c7fb9c79..33c7b6a2db2 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5523,6 +5523,9 @@ query_lookup(query_ctx_t *qctx) { dns_clientinfo_t ci; dns_name_t *rpzqname = NULL; unsigned int dboptions; + dns_ttl_t stale_ttl = 0; + dns_ttl_t stale_refresh = 0; + bool dbfind_stale = false; CCTRACE(ISC_LOG_DEBUG(3), "query_lookup"); @@ -5581,6 +5584,22 @@ query_lookup(query_ctx_t *qctx) { dboptions |= DNS_DBFIND_COVERINGNSEC; } + dns_db_getservestalerefresh(qctx->client->view->cachedb, + &stale_refresh); + dns_db_getservestalettl(qctx->client->view->cachedb, &stale_ttl); + if (stale_refresh > 0) { + if (qctx->client->view->staleanswersok == dns_stale_answer_yes) + { + dboptions |= DNS_DBFIND_STALEENABLED; + } else if (qctx->client->view->staleanswersok == + dns_stale_answer_conf) { + if (qctx->client->view->staleanswersenable && + stale_ttl > 0) { + dboptions |= DNS_DBFIND_STALEENABLED; + } + } + } + result = dns_db_findext(qctx->db, rpzqname, qctx->version, qctx->type, dboptions, qctx->client->now, &qctx->node, qctx->fname, &cm, &ci, qctx->rdataset, @@ -5601,10 +5620,28 @@ query_lookup(query_ctx_t *qctx) { dns_cache_updatestats(qctx->view->cache, result); } - if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) { + /* + * If DNS_DBFIND_STALEOK is set this means we are dealing with a + * lookup following a failed lookup and it is okay to serve a stale + * answer. This will start a time window in rbtdb, tracking the last + * time the RRset lookup failed. + * + * A stale answer may also be served if this is a normal lookup, + * the view has enabled serve-stale (DNS_DBFIND_STALE_ENABLED is set), + * and the request is within the stale-refresh-time window. If this + * is the case we have to make sure that the lookup found a stale + * answer, otherwise "fresh" answers are also treated as stale. + */ + dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0); + if (dbfind_stale != 0 || + (((dboptions & DNS_DBFIND_STALEENABLED) != 0) && + STALE(qctx->rdataset))) + { char namebuf[DNS_NAME_FORMATSIZE]; bool success; + inc_stats(qctx->client, ns_statscounter_trystale); + qctx->client->query.dboptions &= ~DNS_DBFIND_STALEOK; if (dns_rdataset_isassociated(qctx->rdataset) && dns_rdataset_count(qctx->rdataset) > 0 && @@ -5618,10 +5655,20 @@ query_lookup(query_ctx_t *qctx) { dns_name_format(qctx->client->query.qname, namebuf, sizeof(namebuf)); - isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, - NS_LOGMODULE_QUERY, ISC_LOG_INFO, - "%s resolver failure, stale answer %s", namebuf, - success ? "used" : "unavailable"); + if (dbfind_stale) { + isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, + NS_LOGMODULE_QUERY, ISC_LOG_INFO, + "%s resolver failure, stale answer %s", + namebuf, + success ? "used" : "unavailable"); + } else { + isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, + NS_LOGMODULE_QUERY, ISC_LOG_INFO, + "%s query within stale refresh time, " + "stale answer %s", + namebuf, + success ? "used" : "unavailable"); + } if (!success) { QUERY_ERROR(qctx, DNS_R_SERVFAIL); @@ -6833,7 +6880,6 @@ query_usestale(query_ctx_t *qctx) { if (staleanswersok) { qctx->client->query.dboptions |= DNS_DBFIND_STALEOK; - inc_stats(qctx->client, ns_statscounter_trystale); if (qctx->client->query.fetch != NULL) { dns_resolver_destroyfetch(&qctx->client->query.fetch); }