From: Matthijs Mekking Date: Fri, 26 Mar 2021 13:43:07 +0000 (+0100) Subject: Remove result exception on staleonly lookup X-Git-Tag: v9.17.12~16^2~3 X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=aaed7f9d8c2465790d769221dfe8378c7147f5eb;p=thirdparty%2Fbind9.git Remove result exception on staleonly lookup When implementing "stale-answer-client-timeout", we decided that we should only return positive answers prematurely to clients. A negative response is not useful, and in that case it is better to wait for the recursion to complete. To do so, we check the result and if it is not ISC_R_SUCCESS, we decide that it is not good enough. However, there are more return codes that could lead to a positive answer (e.g. CNAME chains). This commit removes the exception and now uses the same logic that other stale lookups use to determine if we found a useful stale answer (stale_found == true). This means we can simplify two test cases in the serve-stale system test: nodata.example is no longer treated differently than data.example. --- diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 1d50abcae1e..02535b002d1 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -1636,6 +1636,11 @@ status=$((status+ret)) # Allow RRset to become stale. sleep 2 +echo_i "sending queries for tests $((n+1))-$((n+2))..." +$DIG -p ${PORT} +tries=1 +timeout=10 @10.53.0.3 data.example TXT > dig.out.test$((n+1)) & +$DIG -p ${PORT} +tries=1 +timeout=10 @10.53.0.3 nodata.example TXT > dig.out.test$((n+2)) +wait + # We configured a long value of 30 seconds for resolver-query-timeout. # That should give us enough time to receive an stale answer from cache # after stale-answer-client-timeout timer of 1.8 sec triggers. @@ -1643,7 +1648,6 @@ n=$((n+1)) echo_i "check stale data.example comes from cache (default stale-answer-client-timeout) ($n)" nextpart ns3/named.run > /dev/null t1=`$PERL -e 'print time()'` -$DIG -p ${PORT} +tries=1 +timeout=10 @10.53.0.3 data.example TXT > dig.out.test$n t2=`$PERL -e 'print time()'` wait_for_log 5 "data.example client timeout, stale answer used" ns3/named.run || ret=1 ret=0 @@ -1656,26 +1660,8 @@ grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.te if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) -echo_i "sending queries for tests $((n+1))-$((n+2))..." -$DIG -p ${PORT} +tries=1 +timeout=3 @10.53.0.3 nodata.example TXT > dig.out.test$((n+1)) & -$DIG -p ${PORT} +tries=1 +timeout=30 @10.53.0.3 nodata.example TXT > dig.out.test$((n+2)) -wait - -# Since nodata.example is cached as NXRRSET and marked as stale at this point, -# BIND must not return this RRset when stale-answer-client-timeout triggers, -# instead, it must attempt to refresh the RRset. Since the authoritative -# server is disabled and we are using resolver-query-timeout value of 10 -# seconds, we expect this query with a timeout of 3 seconds to time out. -n=$((n+1)) -echo_i "check query for nodata.example times out (default stale-answer-client-timeout) ($n)" -grep "connection timed out" dig.out.test$n > /dev/null || ret=1 -if [ $ret != 0 ]; then echo_i "failed"; fi -status=$((status+ret)) - -# For this query we expect BIND to return stale NXRRSET data for -# nodata.example after resolver-query-timeout expires. n=$((n+1)) -echo_i "check stale nodata.example comes from cache after resolver-query-timeout expires (default stale-answer-client-timeout) ($n)" +echo_i "check stale nodata.example comes from cache (default stale-answer-client-timeout) ($n)" grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 @@ -1775,6 +1761,18 @@ status=$((status+ret)) # Allow RRset to become stale. sleep 2 +n=$((n+1)) +ret=0 +echo_i "check stale nodata.example comes from cache (stale-answer-client-timeout 0) ($n)" +nextpart ns3/named.run > /dev/null +$DIG -p ${PORT} @10.53.0.3 nodata.example TXT > dig.out.test$n +wait_for_log 5 "nodata.example stale answer used, an attempt to refresh the RRset" ns3/named.run || ret=1 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 +grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + n=$((n+1)) ret=0 echo_i "check stale data.example comes from cache (stale-answer-client-timeout 0) ($n)" @@ -1815,39 +1813,18 @@ grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) -n=$((n+1)) -echo_i "disable responses from authoritative server ($n)" -ret=0 -$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n -grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 -if [ $ret != 0 ]; then echo_i "failed"; fi -status=$((status+ret)) - -echo_i "sending queries for tests $((n+1))-$((n+2))..." -$DIG -p ${PORT} +tries=1 +timeout=3 @10.53.0.3 nodata.example TXT > dig.out.test$((n+1)) & -$DIG -p ${PORT} +tries=1 +timeout=30 @10.53.0.3 nodata.example TXT > dig.out.test$((n+2)) -wait - -# Since nodata.example is cached as NXRRSET and marked as stale at this point, -# BIND must not prompty return this RRset due to -# stale-answer-client-timeout == 0, instead, it must attempt to refresh the -# RRset. Since the authoritative server is disabled and we are using -# resolver-query-timeout value of 10 seconds, we expect this query with a -# timeout of 3 seconds to time out. -n=$((n+1)) -echo_i "check query for nodata.example times out (stale-answer-client-timeout 0) ($n)" -grep "connection timed out" dig.out.test$n > /dev/null || ret=1 -if [ $ret != 0 ]; then echo_i "failed"; fi -status=$((status+ret)) +wait_for_nodata_refresh() { + $DIG -p ${PORT} @10.53.0.3 nodata.example TXT > dig.out.test$n + grep "status: NOERROR" dig.out.test$n > /dev/null || return 1 + grep "ANSWER: 0," dig.out.test$n > /dev/null || return 1 + grep "example\..*[12].*IN.*SOA" dig.out.test$n > /dev/null || return 1 + return 0 +} -# For this query we expect BIND to return stale NXRRSET data for -# nodata.example after resolver-query-timeout expires. n=$((n+1)) -echo_i "check stale nodata.example comes from cache after resolver-query-timeout expires (stale-answer-client-timeout 0) ($n)" -grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 -grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +ret=0 +echo_i "check stale nodata.example was refreshed (stale-answer-client-timeout 0) ($n)" +retry_quiet 10 wait_for_nodata_refresh || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index f77cc797d46..c995e70a1d4 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -261,8 +261,8 @@ struct dns_dbonupdatelistener { /* * DNS_DBFIND_STALEONLY: This flag is used when we want stale data from the * database, but not due to a failure in resolution, it also doesn't require - * stale-refresh-time window timer to be active. As long as there is a stale - * RRset available, it should be returned. + * stale-refresh-time window timer to be active. As long as there is stale + * data available, it should be returned. */ #define DNS_DBFIND_STALEONLY 0x1000 diff --git a/lib/ns/query.c b/lib/ns/query.c index 7e23a087a55..dbdb355f74a 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5849,22 +5849,16 @@ query_lookup(query_ctx_t *qctx) { (dboptions & DNS_DBFIND_STALEENABLED) != 0); /* - * If DNS_DBFIND_STALEONLY is set, a stale positive answer is requested. + * If DNS_DBFIND_STALEONLY is set, a stale answer is requested. * This can happen if 'stale-answer-client-timeout' is enabled. * - * If 'stale-answer-client-timeout' is set to 0, and a stale positive + * If 'stale-answer-client-timeout' is set to 0, and a stale * answer is found, send it to the client, and try to refresh the - * RRset. If a stale negative answer is found, continue with recursion - * (perhaps the query will be resolved eventually and the answer from - * the authoritative is returned to the client, or the query will - * timeout, in that case DNS_DBFIND_STALEOK may be set, and a stale - * negative answer is returned (or SERVFAIL). + * RRset. * - * If 'stale-answer-client-timeout' is non-zero, and a stale positive + * If 'stale-answer-client-timeout' is non-zero, and a stale * answer is found, send it to the client. Don't try to refresh the - * RRset because a fetch is already in progress. If a stale negative - * answer is found, then abort the lookup and the client has to wait - * until recursion is finished. + * RRset because a fetch is already in progress. */ stale_only = ((dboptions & DNS_DBFIND_STALEONLY) != 0); @@ -5922,7 +5916,7 @@ query_lookup(query_ctx_t *qctx) { qctx->rdataset->attributes |= DNS_RDATASETATTR_STALE_ADDED; if ((qctx->options & DNS_GETDB_STALEFIRST) != 0) { - if (!stale_found || result != ISC_R_SUCCESS) { + if (!stale_found) { /* * We have nothing useful in cache to return * immediately. @@ -5940,8 +5934,6 @@ query_lookup(query_ctx_t *qctx) { } return query_lookup(qctx); } else { - INSIST(stale_found); - INSIST(result == ISC_R_SUCCESS); /* * Immediately return the stale answer, start a * resolver fetch to refresh the data in cache. @@ -5966,7 +5958,7 @@ query_lookup(query_ctx_t *qctx) { "%s client timeout, stale answer %s", namebuf, stale_found ? "used" : "unavailable"); - if (!stale_found || result != ISC_R_SUCCESS) { + if (!stale_found) { return (result); } }