]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Add stale-refresh-time option
authorDiego Fronza <diego@isc.org>
Mon, 19 Oct 2020 20:02:03 +0000 (17:02 -0300)
committerDiego Fronza <diego@isc.org>
Wed, 11 Nov 2020 18:59:56 +0000 (15:59 -0300)
Before this update, BIND would attempt to do a full recursive resolution
process for each query received if the requested rrset had its ttl
expired. If the resolution fails for any reason, only then BIND would
check for stale rrset in cache (if 'stale-cache-enable' and
'stale-answer-enable' is on).

The problem with this approach is that if an authoritative server is
unreachable or is failing to respond, it is very unlikely that the
problem will be fixed in the next seconds.

A better approach to improve performance in those cases, is to mark the
moment in which a resolution failed, and if new queries arrive for that
same rrset, try to respond directly from the stale cache, and do that
for a window of time configured via 'stale-refresh-time'.

Only when this interval expires we then try to do a normal refresh of
the rrset.

The logic behind this commit is as following:

- In query.c / query_gotanswer(), if the test of 'result' variable falls
  to the default case, an error is assumed to have happened, and a call
  to 'query_usestale()' is made to check if serving of stale rrset is
  enabled in configuration.

- If serving of stale answers is enabled, a flag will be turned on in
  the query context to look for stale records:
  query.c:6839
  qctx->client->query.dboptions |= DNS_DBFIND_STALEOK;

- A call to query_lookup() will be made again, inside it a call to
  'dns_db_findext()' is made, which in turn will invoke rbdb.c /
  cache_find().

- In rbtdb.c / cache_find() the important bits of this change is the
  call to 'check_stale_header()', which is a function that yields true
  if we should skip the stale entry, or false if we should consider it.

- In check_stale_header() we now check if the DNS_DBFIND_STALEOK option
  is set, if that is the case we know that this new search for stale
  records was made due to a failure in a normal resolution, so we keep
  track of the time in which the failured occured in rbtdb.c:4559:
  header->last_refresh_fail_ts = search->now;

- In check_stale_header(), if DNS_DBFIND_STALEOK is not set, then we
  know this is a normal lookup, if the record is stale and the query
  time is between last failure time + stale-refresh-time window, then
  we return false so cache_find() knows it can consider this stale
  rrset entry to return as a response.

The last additions are two new methods to the database interface:
- setservestale_refresh
- getservestale_refresh

Those were added so rbtdb can be aware of the value set in configuration
option, since in that level we have no access to the view object.

14 files changed:
bin/named/config.c
bin/named/server.c
bin/tests/system/dyndb/driver/db.c
lib/dns/cache.c
lib/dns/db.c
lib/dns/dnsrps.c
lib/dns/include/dns/cache.h
lib/dns/include/dns/db.h
lib/dns/rbtdb.c
lib/dns/sdb.c
lib/dns/sdlz.c
lib/dns/win32/libdns.def.in
lib/isccfg/namedconf.c
lib/ns/query.c

index 55e1f78f37606372176401bb66dc3387e8c308c2..b54a46bb361210d071e24cb3dadfac5989114027 100644 (file)
@@ -194,6 +194,7 @@ options {\n\
        servfail-ttl 1;\n\
 #      sortlist <none>\n\
        stale-answer-enable false;\n\
+       stale-refresh-time 30; /* 30 seconds */\n\
        stale-answer-ttl 1; /* 1 second */\n\
        stale-cache-enable true;\n\
        synth-from-dnssec no;\n\
index 9b8f1b1e1a83ceca75764a37fe3014b3dfc81e99..57ed2a272ee5463454e44cd16093345a1ca8c71c 100644 (file)
@@ -3895,6 +3895,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
        size_t max_adb_size;
        uint32_t lame_ttl, fail_ttl;
        uint32_t max_stale_ttl = 0;
+       uint32_t stale_refresh_time = 0;
        dns_tsig_keyring_t *ring = NULL;
        dns_view_t *pview = NULL; /* Production view */
        isc_mem_t *cmctx = NULL, *hmctx = NULL;
@@ -4393,6 +4394,11 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
                view->staleanswersok = dns_stale_answer_conf;
        }
 
+       obj = NULL;
+       result = named_config_get(maps, "stale-refresh-time", &obj);
+       INSIST(result == ISC_R_SUCCESS);
+       stale_refresh_time = cfg_obj_asduration(obj);
+
        /*
         * Configure the view's cache.
         *
@@ -4527,6 +4533,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
 
        dns_cache_setcachesize(cache, max_cache_size);
        dns_cache_setservestalettl(cache, max_stale_ttl);
+       dns_cache_setservestalerefresh(cache, stale_refresh_time);
 
        dns_cache_detach(&cache);
 
index cbeff6172be9cdfe6a34c65d410ce59aa7511833..77d335e2eac4ecda74823e70256657f3e6b1d461 100644 (file)
@@ -589,6 +589,8 @@ static dns_dbmethods_t sampledb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index b8e719951a081ad310a9ea61fef13396af8b285d..873b825f2edc89f2f2808b8e086d9d9c7becf490 100644 (file)
@@ -999,6 +999,13 @@ dns_cache_getservestalettl(dns_cache_t *cache) {
        return (result == ISC_R_SUCCESS ? ttl : 0);
 }
 
+void
+dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval) {
+       REQUIRE(VALID_CACHE(cache));
+
+       (void)dns_db_setservestalerefresh(cache->db, interval);
+}
+
 /*
  * The cleaner task is shutting down; do the necessary cleanup.
  */
index 6db94d51b86b254a22339dc5049e996d7d2d4b5d..fa605097da8472abe952a8f619772aac7d61fba3 100644 (file)
@@ -1089,6 +1089,28 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
        return (ISC_R_NOTIMPLEMENTED);
 }
 
+isc_result_t
+dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval) {
+       REQUIRE(DNS_DB_VALID(db));
+       REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0);
+
+       if (db->methods->setservestalerefresh != NULL) {
+               return ((db->methods->setservestalerefresh)(db, interval));
+       }
+       return (ISC_R_NOTIMPLEMENTED);
+}
+
+isc_result_t
+dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval) {
+       REQUIRE(DNS_DB_VALID(db));
+       REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0);
+
+       if (db->methods->getservestalerefresh != NULL) {
+               return ((db->methods->getservestalerefresh)(db, interval));
+       }
+       return (ISC_R_NOTIMPLEMENTED);
+}
+
 isc_result_t
 dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) {
        REQUIRE(dns_db_iszone(db));
index 2d261c5c6ac8c9b15f3fd66e5696bfe338cf442b..0f2ffb5f353da90caf070543ae32d451c9c7001c 100644 (file)
@@ -967,6 +967,8 @@ static dns_dbmethods_t rpsdb_db_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index ce39765ebce0e7f4e472427b07b8f67d3c5c6507..0474d78dbdce66b7d71b23f7cb57cf12ea367906 100644 (file)
@@ -255,6 +255,18 @@ dns_cache_getservestalettl(dns_cache_t *cache);
  *\li  'cache' to be valid.
  */
 
+void
+dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval);
+/*%<
+ * Sets the length of time to wait before attempting to refresh a rrset
+ * if a previous attempt in doing so has failed.
+ * During this time window if stale rrset are available in cache they
+ * will be directly returned to client.
+ *
+ * Requires:
+ *\li  'cache' to be valid.
+ */
+
 isc_result_t
 dns_cache_flush(dns_cache_t *cache);
 /*%<
index 156e6e8329585392ec92cf1390d8ef64f77c77d0..d00a407ec96328225bb1365892f7ec3af42dd8c0 100644 (file)
@@ -178,6 +178,8 @@ typedef struct dns_dbmethods {
                                uint64_t *records, uint64_t *bytes);
        isc_result_t (*setservestalettl)(dns_db_t *db, dns_ttl_t ttl);
        isc_result_t (*getservestalettl)(dns_db_t *db, dns_ttl_t *ttl);
+       isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval);
+       isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval);
        isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats);
        isc_result_t (*adjusthashsize)(dns_db_t *db, size_t size);
 } dns_dbmethods_t;
@@ -238,6 +240,7 @@ struct dns_dbonupdatelistener {
 #define DNS_DBFIND_ADDITIONALOK 0x0100
 #define DNS_DBFIND_NOZONECUT   0x0200
 #define DNS_DBFIND_STALEOK     0x0400
+#define DNS_DBFIND_STALEENABLED 0x0800
 /*@}*/
 
 /*@{*/
@@ -1701,6 +1704,39 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl);
  * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
  */
 
+isc_result_t
+dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval);
+/*%<
+ * Sets the length of time to wait before attempting to refresh a rrset
+ * if a previous attempt in doing so has failed.
+ * During this time window if stale rrset are available in cache they
+ * will be directly returned to client.
+ *
+ * Requires:
+ * \li 'db' is a valid cache database.
+ * \li 'interval' is number of seconds before attempting to refresh data.
+ *
+ * Returns:
+ * \li #ISC_R_SUCCESS
+ * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
+ */
+
+isc_result_t
+dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval);
+/*%<
+ * Gets the length of time in which stale answers are directly returned from
+ * cache before attempting to refresh them, in case a previous attempt in
+ * doing so has failed.
+ *
+ * Requires:
+ * \li 'db' is a valid cache database.
+ * \li 'interval' is number of seconds before attempting to refresh data.
+ *
+ * Returns:
+ * \li #ISC_R_SUCCESS
+ * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation.
+ */
+
 isc_result_t
 dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats);
 /*%<
index 0f2e3773b4b735ba631cf984ba21e03982d08272..ab364aa9deb8603d2eb0ad20b2919f79c5bd4215 100644 (file)
@@ -206,6 +206,7 @@ typedef struct rdatasetheader {
        rbtdb_rdatatype_t type;
        atomic_uint_least16_t attributes;
        dns_trust_t trust;
+       isc_stdtime_t last_refresh_fail_ts;
        struct noqname *noqname;
        struct noqname *closest;
        unsigned int is_mmapped : 1;
@@ -489,6 +490,13 @@ struct dns_rbtdb {
         */
        dns_ttl_t serve_stale_ttl;
 
+       /*
+        * The time after a failed lookup, where stale answers from cache
+        * may be used directly in a DNS response without attempting a
+        * new iterative lookup.
+        */
+       uint32_t serve_stale_refresh;
+
        /*
         * This is a linked list used to implement the LRU cache.  There will
         * be node_lock_count linked lists here.  Nodes in bucket 1 will be
@@ -4548,6 +4556,27 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
                    stale > search->now) {
                        mark_header_stale(search->rbtdb, header);
                        *header_prev = header;
+                       /*
+                        * If DNS_DBFIND_STALEOK is set then it means we failed
+                        * to resolve the name during recursion, in this case we
+                        * mark the time in which the refresh failed.
+                        */
+                       if ((search->options & DNS_DBFIND_STALEOK) != 0) {
+                               header->last_refresh_fail_ts = search->now;
+                       } else if ((search->options &
+                                   DNS_DBFIND_STALEENABLED) != 0 &&
+                                  search->now <
+                                          (header->last_refresh_fail_ts +
+                                           search->rbtdb->serve_stale_refresh))
+                       {
+                               /*
+                                * If we are within interval between last
+                                * refresh failure time + 'stale-refresh-time',
+                                * then don't skip this stale entry but use it
+                                * instead.
+                                */
+                               return (false);
+                       }
                        return ((search->options & DNS_DBFIND_STALEOK) == 0);
                }
 
@@ -8380,6 +8409,29 @@ getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
        return (ISC_R_SUCCESS);
 }
 
+static isc_result_t
+setservestalerefresh(dns_db_t *db, uint32_t interval) {
+       dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+       REQUIRE(VALID_RBTDB(rbtdb));
+       REQUIRE(IS_CACHE(rbtdb));
+
+       /* currently no bounds checking.  0 means disable. */
+       rbtdb->serve_stale_refresh = interval;
+       return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+getservestalerefresh(dns_db_t *db, uint32_t *interval) {
+       dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+       REQUIRE(VALID_RBTDB(rbtdb));
+       REQUIRE(IS_CACHE(rbtdb));
+
+       *interval = rbtdb->serve_stale_refresh;
+       return (ISC_R_SUCCESS);
+}
+
 static dns_dbmethods_t zone_methods = { attach,
                                        detach,
                                        beginload,
@@ -8427,6 +8479,8 @@ static dns_dbmethods_t zone_methods = { attach,
                                        getsize,
                                        NULL, /* setservestalettl */
                                        NULL, /* getservestalettl */
+                                       NULL, /* setservestalerefresh */
+                                       NULL, /* getservestalerefresh */
                                        setgluecachestats,
                                        adjusthashsize };
 
@@ -8477,6 +8531,8 @@ static dns_dbmethods_t cache_methods = { attach,
                                         NULL, /* getsize */
                                         setservestalettl,
                                         getservestalettl,
+                                        setservestalerefresh,
+                                        getservestalerefresh,
                                         NULL,
                                         adjusthashsize };
 
index d525963f52a91a20ac1ba03ef0288bf0f7eb4743..d9de42240977603e9e2d7438fe9799df010a2c10 100644 (file)
@@ -1309,6 +1309,8 @@ static dns_dbmethods_t sdb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index 14b55eb074bcd26da519a184ccd2569185eb14ec..c8a615a0f3d6020e99607dd23c2af0f7fdc85ca0 100644 (file)
@@ -1281,6 +1281,8 @@ static dns_dbmethods_t sdlzdb_methods = {
        NULL, /* getsize */
        NULL, /* setservestalettl */
        NULL, /* getservestalettl */
+       NULL, /* setservestalerefresh */
+       NULL, /* getservestalerefresh */
        NULL, /* setgluecachestats */
        NULL  /* adjusthashsize */
 };
index a8a86e7d3b4e643f012bbe5460cc6ac95d51d2c7..e28759dfe744d35cc10a5f8a7d8b4306f3ef3791 100644 (file)
@@ -84,6 +84,7 @@ dns_cache_flushname
 dns_cache_flushnode
 dns_cache_getcachesize
 dns_cache_getname
+dns_cache_getservestalerefresh
 dns_cache_getservestalettl
 dns_cache_getstats
 dns_cache_load
@@ -95,6 +96,7 @@ dns_cache_renderxml
 @END LIBXML2
 dns_cache_setcachesize
 dns_cache_setfilename
+dns_cache_setservestalerefresh
 dns_cache_setservestalettl
 dns_cache_updatestats
 dns_catz_add_zone
@@ -200,6 +202,7 @@ dns_db_getnsec3parameters
 dns_db_getoriginnode
 dns_db_getrrsetstats
 dns_db_getservestalettl
+dns_db_getservestalerefresh
 dns_db_getsigningtime
 dns_db_getsize
 dns_db_getsoaserial
@@ -225,6 +228,7 @@ dns_db_serialize
 dns_db_setcachestats
 dns_db_setgluecachestats
 dns_db_setservestalettl
+dns_db_setservestalerefresh
 dns_db_setsigningtime
 dns_db_settask
 dns_db_subtractrdataset
index e4fe0f0beec809c106af0fe8b4255a8c73511a7a..0694863f70e55243b301685928c0031518f3293b 100644 (file)
@@ -2043,6 +2043,7 @@ static cfg_clausedef_t view_clauses[] = {
        { "stale-answer-enable", &cfg_type_boolean, 0 },
        { "stale-answer-ttl", &cfg_type_duration, 0 },
        { "stale-cache-enable", &cfg_type_boolean, 0 },
+       { "stale-refresh-time", &cfg_type_duration, 0 },
        { "suppress-initial-notify", &cfg_type_boolean, CFG_CLAUSEFLAG_NYI },
        { "synth-from-dnssec", &cfg_type_boolean, 0 },
        { "topology", &cfg_type_bracketed_aml, CFG_CLAUSEFLAG_ANCIENT },
index 8f2b78129f5fe56eeb6a4016c58308246878a020..8e26ff5762fe9276987a9e193d4f7336274e90fa 100644 (file)
@@ -5516,6 +5516,9 @@ query_lookup(query_ctx_t *qctx) {
        dns_clientinfo_t ci;
        dns_name_t *rpzqname = NULL;
        unsigned int dboptions;
+       dns_ttl_t stale_ttl = 0;
+       dns_ttl_t stale_refresh = 0;
+       bool dbfind_stale = false;
 
        CCTRACE(ISC_LOG_DEBUG(3), "query_lookup");
 
@@ -5574,6 +5577,22 @@ query_lookup(query_ctx_t *qctx) {
                dboptions |= DNS_DBFIND_COVERINGNSEC;
        }
 
+       dns_db_getservestalerefresh(qctx->client->view->cachedb,
+                                   &stale_refresh);
+       dns_db_getservestalettl(qctx->client->view->cachedb, &stale_ttl);
+       if (stale_refresh > 0) {
+               if (qctx->client->view->staleanswersok == dns_stale_answer_yes)
+               {
+                       dboptions |= DNS_DBFIND_STALEENABLED;
+               } else if (qctx->client->view->staleanswersok ==
+                          dns_stale_answer_conf) {
+                       if (qctx->client->view->staleanswersenable &&
+                           stale_ttl > 0) {
+                               dboptions |= DNS_DBFIND_STALEENABLED;
+                       }
+               }
+       }
+
        result = dns_db_findext(qctx->db, rpzqname, qctx->version, qctx->type,
                                dboptions, qctx->client->now, &qctx->node,
                                qctx->fname, &cm, &ci, qctx->rdataset,
@@ -5594,10 +5613,28 @@ query_lookup(query_ctx_t *qctx) {
                dns_cache_updatestats(qctx->view->cache, result);
        }
 
-       if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) {
+       /*
+        * If DNS_DBFIND_STALEOK is set this means we are dealing with a
+        * lookup following a failed lookup and it is okay to serve a stale
+        * answer. This will start a time window in rbtdb, tracking the last
+        * time the RRset lookup failed.
+        *
+        * A stale answer may also be served if this is a normal lookup,
+        * the view has enabled serve-stale (DNS_DBFIND_STALE_ENABLED is set),
+        * and the request is within the stale-refresh-time window. If this
+        * is the case we have to make sure that the lookup found a stale
+        * answer, otherwise "fresh" answers are also treated as stale.
+        */
+       dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0);
+       if (dbfind_stale != 0 ||
+           (((dboptions & DNS_DBFIND_STALEENABLED) != 0) &&
+            STALE(qctx->rdataset)))
+       {
                char namebuf[DNS_NAME_FORMATSIZE];
                bool success;
 
+               inc_stats(qctx->client, ns_statscounter_trystale);
+
                qctx->client->query.dboptions &= ~DNS_DBFIND_STALEOK;
                if (dns_rdataset_isassociated(qctx->rdataset) &&
                    dns_rdataset_count(qctx->rdataset) > 0 &&
@@ -5611,10 +5648,20 @@ query_lookup(query_ctx_t *qctx) {
 
                dns_name_format(qctx->client->query.qname, namebuf,
                                sizeof(namebuf));
-               isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
-                             NS_LOGMODULE_QUERY, ISC_LOG_INFO,
-                             "%s resolver failure, stale answer %s", namebuf,
-                             success ? "used" : "unavailable");
+               if (dbfind_stale) {
+                       isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
+                                     NS_LOGMODULE_QUERY, ISC_LOG_INFO,
+                                     "%s resolver failure, stale answer %s",
+                                     namebuf,
+                                     success ? "used" : "unavailable");
+               } else {
+                       isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE,
+                                     NS_LOGMODULE_QUERY, ISC_LOG_INFO,
+                                     "%s query within stale refresh time, "
+                                     "stale answer %s",
+                                     namebuf,
+                                     success ? "used" : "unavailable");
+               }
 
                if (!success) {
                        QUERY_ERROR(qctx, DNS_R_SERVFAIL);
@@ -6826,7 +6873,6 @@ query_usestale(query_ctx_t *qctx) {
 
        if (staleanswersok) {
                qctx->client->query.dboptions |= DNS_DBFIND_STALEOK;
-               inc_stats(qctx->client, ns_statscounter_trystale);
                if (qctx->client->query.fetch != NULL) {
                        dns_resolver_destroyfetch(&qctx->client->query.fetch);
                }