]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Add stale-refresh-time option
authorDiego Fronza <diego@isc.org>
Mon, 19 Oct 2020 20:02:03 +0000 (17:02 -0300)
committerDiego Fronza <diego@isc.org>
Wed, 11 Nov 2020 15:53:23 +0000 (12:53 -0300)
Before this update, BIND would attempt to do a full recursive resolution
process for each query received if the requested rrset had its ttl
expired. If the resolution fails for any reason, only then BIND would
check for stale rrset in cache (if 'stale-cache-enable' and
'stale-answer-enable' is on).

The problem with this approach is that if an authoritative server is
unreachable or is failing to respond, it is very unlikely that the
problem will be fixed in the next seconds.

A better approach to improve performance in those cases, is to mark the
moment in which a resolution failed, and if new queries arrive for that
same rrset, try to respond directly from the stale cache, and do that
for a window of time configured via 'stale-refresh-time'.

Only when this interval expires we then try to do a normal refresh of
the rrset.

The logic behind this commit is as following:

- In query.c / query_gotanswer(), if the test of 'result' variable falls
  to the default case, an error is assumed to have happened, and a call
  to 'query_usestale()' is made to check if serving of stale rrset is
  enabled in configuration.

- If serving of stale answers is enabled, a flag will be turned on in
  the query context to look for stale records:
  query.c:6839
  qctx->client->query.dboptions |= DNS_DBFIND_STALEOK;

- A call to query_lookup() will be made again, inside it a call to
  'dns_db_findext()' is made, which in turn will invoke rbdb.c /
  cache_find().

- In rbtdb.c / cache_find() the important bits of this change is the
  call to 'check_stale_header()', which is a function that yields true
  if we should skip the stale entry, or false if we should consider it.

- In check_stale_header() we now check if the DNS_DBFIND_STALEOK option
  is set, if that is the case we know that this new search for stale
  records was made due to a failure in a normal resolution, so we keep
  track of the time in which the failured occured in rbtdb.c:4559:
  header->last_refresh_fail_ts = search->now;

- In check_stale_header(), if DNS_DBFIND_STALEOK is not set, then we
  know this is a normal lookup, if the record is stale and the query
  time is between last failure time + stale-refresh-time window, then
  we return false so cache_find() knows it can consider this stale
  rrset entry to return as a response.

The last additions are two new methods to the database interface:
- setservestale_refresh
- getservestale_refresh

Those were added so rbtdb can be aware of the value set in configuration
option, since in that level we have no access to the view object.

14 files changed:
bin/named/config.c
bin/named/server.c
bin/tests/system/dyndb/driver/db.c
lib/dns/cache.c
lib/dns/db.c
lib/dns/dnsrps.c
lib/dns/include/dns/cache.h
lib/dns/include/dns/db.h
lib/dns/rbtdb.c
lib/dns/sdb.c
lib/dns/sdlz.c
lib/dns/win32/libdns.def.in
lib/isccfg/namedconf.c
lib/ns/query.c

index 863feae8b35a2604072f3f9a80ef3d6207cd1380..9b0c6f06e2f99621950099a4803d6dee9ebc4bbe 100644 (file)
@@ -196,6 +196,7 @@ options {\n\
        servfail-ttl 1;\n\
 #      sortlist <none>\n\
        stale-answer-enable false;\n\
+       stale-refresh-time 30; /* 30 seconds */\n\
        stale-answer-ttl 1; /* 1 second */\n\
        stale-cache-enable false;\n\
        synth-from-dnssec no;\n\
index a864c1d8fd902e9e60bbf9b8c409c7fc76d37cbd..f83473c1b2fde55117001b263e5266063020c017 100644 (file)
@@ -3897,6 +3897,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
        size_t max_adb_size;
        uint32_t lame_ttl, fail_ttl;
        uint32_t max_stale_ttl = 0;
+       uint32_t stale_refresh_time = 0;
        dns_tsig_keyring_t *ring = NULL;
        dns_view_t *pview = NULL; /* Production view */
        isc_mem_t *cmctx = NULL, *hmctx = NULL;
@@ -4395,6 +4396,11 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
                view->staleanswersok = dns_stale_answer_conf;
        }
 
+       obj = NULL;
+       result = named_config_get(maps, "stale-refresh-time", &obj);
+       INSIST(result == ISC_R_SUCCESS);
+       stale_refresh_time = cfg_obj_asduration(obj);
+
        /*
         * Configure the view's cache.
         *
@@ -4529,6 +4535,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
 
        dns_cache_setcachesize(cache, max_cache_size);
        dns_cache_setservestalettl(cache, max_stale_ttl);
+       dns_cache_setservestalerefresh(cache, stale_refresh_time);
 
        dns_cache_detach(&cache);
 
index cbeff6172be9cdfe6a34c65d410ce59aa7511833..77d335e2eac4ecda74823e70256657f3e6b1d461 100644 (file)
@@ -589,6 +589,8 @@ static dns_dbmethods_t sampledb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index b8e719951a081ad310a9ea61fef13396af8b285d..873b825f2edc89f2f2808b8e086d9d9c7becf490 100644 (file)
@@ -999,6 +999,13 @@ dns_cache_getservestalettl(dns_cache_t *cache) {
        return (result == ISC_R_SUCCESS ? ttl : 0);
 }
 
+void
+dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval) {
+       REQUIRE(VALID_CACHE(cache));
+
+       (void)dns_db_setservestalerefresh(cache->db, interval);
+}
+
 /*
  * The cleaner task is shutting down; do the necessary cleanup.
  */
index 6db94d51b86b254a22339dc5049e996d7d2d4b5d..fa605097da8472abe952a8f619772aac7d61fba3 100644 (file)
@@ -1089,6 +1089,28 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
        return (ISC_R_NOTIMPLEMENTED);
 }
 
+isc_result_t
+dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval) {
+       REQUIRE(DNS_DB_VALID(db));
+       REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0);
+
+       if (db->methods->setservestalerefresh != NULL) {
+               return ((db->methods->setservestalerefresh)(db, interval));
+       }
+       return (ISC_R_NOTIMPLEMENTED);
+}
+
+isc_result_t
+dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval) {
+       REQUIRE(DNS_DB_VALID(db));
+       REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0);
+
+       if (db->methods->getservestalerefresh != NULL) {
+               return ((db->methods->getservestalerefresh)(db, interval));
+       }
+       return (ISC_R_NOTIMPLEMENTED);
+}
+
 isc_result_t
 dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) {
        REQUIRE(dns_db_iszone(db));
index 2d261c5c6ac8c9b15f3fd66e5696bfe338cf442b..0f2ffb5f353da90caf070543ae32d451c9c7001c 100644 (file)
@@ -967,6 +967,8 @@ static dns_dbmethods_t rpsdb_db_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index ce39765ebce0e7f4e472427b07b8f67d3c5c6507..0474d78dbdce66b7d71b23f7cb57cf12ea367906 100644 (file)
@@ -255,6 +255,18 @@ dns_cache_getservestalettl(dns_cache_t *cache);
  *\li  'cache' to be valid.
  */
 
+void
+dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval);
+/*%<
+ * Sets the length of time to wait before attempting to refresh a rrset
+ * if a previous attempt in doing so has failed.
+ * During this time window if stale rrset are available in cache they
+ * will be directly returned to client.
+ *
+ * Requires:
+ *\li  'cache' to be valid.
+ */
+
 isc_result_t
 dns_cache_flush(dns_cache_t *cache);
 /*%<
index 395e8e9679dbed2d2926e9c4b6a2c857631603cf..b79dcae0fa5f8b5d63518664004ea5716ab39fea 100644 (file)
@@ -178,6 +178,8 @@ typedef struct dns_dbmethods {
                                uint64_t *records, uint64_t *bytes);
        isc_result_t (*setservestalettl)(dns_db_t *db, dns_ttl_t ttl);
        isc_result_t (*getservestalettl)(dns_db_t *db, dns_ttl_t *ttl);
+       isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval);
+       isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval);
        isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats);
        isc_result_t (*adjusthashsize)(dns_db_t *db, size_t size);
 } dns_dbmethods_t;
@@ -238,6 +240,7 @@ struct dns_dbonupdatelistener {
 #define DNS_DBFIND_ADDITIONALOK 0x0100
 #define DNS_DBFIND_NOZONECUT   0x0200
 #define DNS_DBFIND_STALEOK     0x0400
+#define DNS_DBFIND_STALEENABLED 0x0800
 /*@}*/
 
 /*@{*/
@@ -1701,6 +1704,39 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl);
  * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
  */
 
+isc_result_t
+dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval);
+/*%<
+ * Sets the length of time to wait before attempting to refresh a rrset
+ * if a previous attempt in doing so has failed.
+ * During this time window if stale rrset are available in cache they
+ * will be directly returned to client.
+ *
+ * Requires:
+ * \li 'db' is a valid cache database.
+ * \li 'interval' is number of seconds before attempting to refresh data.
+ *
+ * Returns:
+ * \li #ISC_R_SUCCESS
+ * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
+ */
+
+isc_result_t
+dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval);
+/*%<
+ * Gets the length of time in which stale answers are directly returned from
+ * cache before attempting to refresh them, in case a previous attempt in
+ * doing so has failed.
+ *
+ * Requires:
+ * \li 'db' is a valid cache database.
+ * \li 'interval' is number of seconds before attempting to refresh data.
+ *
+ * Returns:
+ * \li #ISC_R_SUCCESS
+ * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
+ */
+
 isc_result_t
 dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats);
 /*%<
index 6cad7abdd9234a163d9dae8c3bad609905ed4112..f8d51e9a4bb2c54c1c55f93cbf2658350746218a 100644 (file)
@@ -205,6 +205,7 @@ typedef struct rdatasetheader {
        rbtdb_rdatatype_t type;
        atomic_uint_least16_t attributes;
        dns_trust_t trust;
+       isc_stdtime_t last_refresh_fail_ts;
        struct noqname *noqname;
        struct noqname *closest;
        unsigned int is_mmapped : 1;
@@ -488,6 +489,13 @@ struct dns_rbtdb {
         */
        dns_ttl_t serve_stale_ttl;
 
+       /*
+        * The time after a failed lookup, where stale answers from cache
+        * may be used directly in a DNS response without attempting a
+        * new iterative lookup.
+        */
+       uint32_t serve_stale_refresh;
+
        /*
         * This is a linked list used to implement the LRU cache.  There will
         * be node_lock_count linked lists here.  Nodes in bucket 1 will be
@@ -4547,6 +4555,27 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
                    stale > search->now) {
                        mark_header_stale(search->rbtdb, header);
                        *header_prev = header;
+                       /*
+                        * If DNS_DBFIND_STALEOK is set then it means we failed
+                        * to resolve the name during recursion, in this case we
+                        * mark the time in which the refresh failed.
+                        */
+                       if ((search->options & DNS_DBFIND_STALEOK) != 0) {
+                               header->last_refresh_fail_ts = search->now;
+                       } else if ((search->options &
+                                   DNS_DBFIND_STALEENABLED) != 0 &&
+                                  search->now <
+                                          (header->last_refresh_fail_ts +
+                                           search->rbtdb->serve_stale_refresh))
+                       {
+                               /*
+                                * If we are within interval between last
+                                * refresh failure time + 'stale-refresh-time',
+                                * then don't skip this stale entry but use it
+                                * instead.
+                                */
+                               return (false);
+                       }
                        return ((search->options & DNS_DBFIND_STALEOK) == 0);
                }
 
@@ -8379,6 +8408,29 @@ getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
        return (ISC_R_SUCCESS);
 }
 
+static isc_result_t
+setservestalerefresh(dns_db_t *db, uint32_t interval) {
+       dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+       REQUIRE(VALID_RBTDB(rbtdb));
+       REQUIRE(IS_CACHE(rbtdb));
+
+       /* currently no bounds checking.  0 means disable. */
+       rbtdb->serve_stale_refresh = interval;
+       return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+getservestalerefresh(dns_db_t *db, uint32_t *interval) {
+       dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+       REQUIRE(VALID_RBTDB(rbtdb));
+       REQUIRE(IS_CACHE(rbtdb));
+
+       *interval = rbtdb->serve_stale_refresh;
+       return (ISC_R_SUCCESS);
+}
+
 static dns_dbmethods_t zone_methods = { attach,
                                        detach,
                                        beginload,
@@ -8426,6 +8478,8 @@ static dns_dbmethods_t zone_methods = { attach,
                                        getsize,
                                        NULL, /* setservestalettl */
                                        NULL, /* getservestalettl */
+                                       NULL, /* setservestalerefresh */
+                                       NULL, /* getservestalerefresh */
                                        setgluecachestats,
                                        adjusthashsize };
 
@@ -8476,6 +8530,8 @@ static dns_dbmethods_t cache_methods = { attach,
                                         NULL, /* getsize */
                                         setservestalettl,
                                         getservestalettl,
+                                        setservestalerefresh,
+                                        getservestalerefresh,
                                         NULL,
                                         adjusthashsize };
 
index d525963f52a91a20ac1ba03ef0288bf0f7eb4743..d9de42240977603e9e2d7438fe9799df010a2c10 100644 (file)
@@ -1309,6 +1309,8 @@ static dns_dbmethods_t sdb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index 14b55eb074bcd26da519a184ccd2569185eb14ec..c8a615a0f3d6020e99607dd23c2af0f7fdc85ca0 100644 (file)
@@ -1281,6 +1281,8 @@ static dns_dbmethods_t sdlzdb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index 6567fa4ceddadb4a9580c31452cea64507db29f0..63d9530f0500d7e8d9a612a3e0be5011bb2da8f5 100644 (file)
@@ -82,6 +82,7 @@ dns_cache_flushname
 dns_cache_flushnode
 dns_cache_getcachesize
 dns_cache_getname
+dns_cache_getservestalerefresh
 dns_cache_getservestalettl
 dns_cache_getstats
 dns_cache_load
@@ -93,6 +94,7 @@ dns_cache_renderxml
 @END LIBXML2
 dns_cache_setcachesize
 dns_cache_setfilename
+dns_cache_setservestalerefresh
 dns_cache_setservestalettl
 dns_cache_updatestats
 dns_catz_add_zone
@@ -198,6 +200,7 @@ dns_db_getnsec3parameters
 dns_db_getoriginnode
 dns_db_getrrsetstats
 dns_db_getservestalettl
+dns_db_getservestalerefresh
 dns_db_getsigningtime
 dns_db_getsize
 dns_db_getsoaserial
@@ -223,6 +226,7 @@ dns_db_serialize
 dns_db_setcachestats
 dns_db_setgluecachestats
 dns_db_setservestalettl
+dns_db_setservestalerefresh
 dns_db_setsigningtime
 dns_db_settask
 dns_db_subtractrdataset
index 23cf73f477b1a89361c76bd7b3cd838b01276090..7551e1d8cfdacc1e3f83be1812540f0b9c983489 100644 (file)
@@ -2051,6 +2051,7 @@ static cfg_clausedef_t view_clauses[] = {
        { "stale-answer-enable", &cfg_type_boolean, 0 },
        { "stale-answer-ttl", &cfg_type_duration, 0 },
        { "stale-cache-enable", &cfg_type_boolean, 0 },
+       { "stale-refresh-time", &cfg_type_duration, 0 },
        { "suppress-initial-notify", &cfg_type_boolean, CFG_CLAUSEFLAG_NYI },
        { "synth-from-dnssec", &cfg_type_boolean, 0 },
        { "topology", &cfg_type_bracketed_aml, CFG_CLAUSEFLAG_ANCIENT },
index fd4c7fb9c795b10cea5ce27eff1506d6f088985a..33c7b6a2db2d103b4db4235c2d65b1bcea548117 100644 (file)
@@ -5523,6 +5523,9 @@ query_lookup(query_ctx_t *qctx) {
        dns_clientinfo_t ci;
        dns_name_t *rpzqname = NULL;
        unsigned int dboptions;
+       dns_ttl_t stale_ttl = 0;
+       dns_ttl_t stale_refresh = 0;
+       bool dbfind_stale = false;
 
        CCTRACE(ISC_LOG_DEBUG(3), "query_lookup");
 
@@ -5581,6 +5584,22 @@ query_lookup(query_ctx_t *qctx) {
                dboptions |= DNS_DBFIND_COVERINGNSEC;
        }
 
+       dns_db_getservestalerefresh(qctx->client->view->cachedb,
+                                   &stale_refresh);
+       dns_db_getservestalettl(qctx->client->view->cachedb, &stale_ttl);
+       if (stale_refresh > 0) {
+               if (qctx->client->view->staleanswersok == dns_stale_answer_yes)
+               {
+                       dboptions |= DNS_DBFIND_STALEENABLED;
+               } else if (qctx->client->view->staleanswersok ==
+                          dns_stale_answer_conf) {
+                       if (qctx->client->view->staleanswersenable &&
+                           stale_ttl > 0) {
+                               dboptions |= DNS_DBFIND_STALEENABLED;
+                       }
+               }
+       }
+
        result = dns_db_findext(qctx->db, rpzqname, qctx->version, qctx->type,
                                dboptions, qctx->client->now, &qctx->node,
                                qctx->fname, &cm, &ci, qctx->rdataset,
@@ -5601,10 +5620,28 @@ query_lookup(query_ctx_t *qctx) {
                dns_cache_updatestats(qctx->view->cache, result);
        }
 
-       if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) {
+       /*
+        * If DNS_DBFIND_STALEOK is set this means we are dealing with a
+        * lookup following a failed lookup and it is okay to serve a stale
+        * answer. This will start a time window in rbtdb, tracking the last
+        * time the RRset lookup failed.
+        *
+        * A stale answer may also be served if this is a normal lookup,
+        * the view has enabled serve-stale (DNS_DBFIND_STALE_ENABLED is set),
+        * and the request is within the stale-refresh-time window. If this
+        * is the case we have to make sure that the lookup found a stale
+        * answer, otherwise "fresh" answers are also treated as stale.
+        */
+       dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0);
+       if (dbfind_stale != 0 ||
+           (((dboptions & DNS_DBFIND_STALEENABLED) != 0) &&
+            STALE(qctx->rdataset)))
+       {
                char namebuf[DNS_NAME_FORMATSIZE];
                bool success;
 
+               inc_stats(qctx->client, ns_statscounter_trystale);
+
                qctx->client->query.dboptions &= ~DNS_DBFIND_STALEOK;
                if (dns_rdataset_isassociated(qctx->rdataset) &&
                    dns_rdataset_count(qctx->rdataset) > 0 &&
@@ -5618,10 +5655,20 @@ query_lookup(query_ctx_t *qctx) {
 
                dns_name_format(qctx->client->query.qname, namebuf,
                                sizeof(namebuf));
-               isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
-                             NS_LOGMODULE_QUERY, ISC_LOG_INFO,
-                             "%s resolver failure, stale answer %s", namebuf,
-                             success ? "used" : "unavailable");
+               if (dbfind_stale) {
+                       isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
+                                     NS_LOGMODULE_QUERY, ISC_LOG_INFO,
+                                     "%s resolver failure, stale answer %s",
+                                     namebuf,
+                                     success ? "used" : "unavailable");
+               } else {
+                       isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
+                                     NS_LOGMODULE_QUERY, ISC_LOG_INFO,
+                                     "%s query within stale refresh time, "
+                                     "stale answer %s",
+                                     namebuf,
+                                     success ? "used" : "unavailable");
+               }
 
                if (!success) {
                        QUERY_ERROR(qctx, DNS_R_SERVFAIL);
@@ -6833,7 +6880,6 @@ query_usestale(query_ctx_t *qctx) {
 
        if (staleanswersok) {
                qctx->client->query.dboptions |= DNS_DBFIND_STALEOK;
-               inc_stats(qctx->client, ns_statscounter_trystale);
                if (qctx->client->query.fetch != NULL) {
                        dns_resolver_destroyfetch(&qctx->client->query.fetch);
                }