From: Matthijs Mekking Date: Wed, 10 Mar 2021 14:14:56 +0000 (+0100) Subject: Fix servestale fetchlimits crash X-Git-Tag: v9.16.13~2^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=96953fc293bdd5db8a0e90578f5942f1e6cb3607;p=thirdparty%2Fbind9.git Fix servestale fetchlimits crash When we query the resolver for a domain name that is in the same zone for which is already one or more fetches outstanding, we could potentially hit the fetch limits. If so, recursion fails immediately for the incoming query and if serve-stale is enabled, we may try to return a stale answer. If the resolver is also is authoritative for the parent zone (for example the root zone), first a delegation is found, but we first check the cache for a better response. Nothing is found in the cache, so we try to recurse to find the answer to the query. Because of fetch-limits 'dns_resolver_createfetch()' returns an error, which 'ns_query_recurse()' propagates to the caller, 'query_delegation_recurse()'. Because serve-stale is enabled, 'query_usestale()' is called, setting 'qctx->db' to the cache db, but leaving 'qctx->version' untouched. Now 'query_lookup()' is called to search for stale data in the cache database with a non-NULL 'qctx->version' (which is set to a zone db version), and thus we hit an assertion in rbtdb. This crash was introduced in 'v9_16' by commit 2afaff75edba5e06060ab1dcae5eb824c725aec9. (cherry picked from commit 87591de6f73590c743a053a21052f98e6762012c) --- diff --git a/CHANGES b/CHANGES index 32347f4a4c6..13c9bdaaa43 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,9 @@ +5597. [bug] When serve-stale was enabled and starting the recursive + resolution process for a query failed, a named instance + could crash if it was configured as both a recursive and + authoritative server. This problem was introduced by + change 5573 and has now been fixed. [GL #2565] + 5595. [cleanup] Public header files for BIND 9 libraries no longer directly include third-party library headers. This prevents the need to include paths to third-party header diff --git a/bin/tests/system/serve-stale/ns3/named7.conf.in b/bin/tests/system/serve-stale/ns3/named7.conf.in new file mode 100644 index 00000000000..403a6fa9e25 --- /dev/null +++ b/bin/tests/system/serve-stale/ns3/named7.conf.in @@ -0,0 +1,55 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + + +/* + * Test serve-stale interaction with fetch-limits (dual-mode). + */ + +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.3 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.3; + notify-source 10.53.0.3; + transfer-source 10.53.0.3; + port @PORT@; + pid-file "named.pid"; + listen-on { 10.53.0.3; }; + listen-on-v6 { none; }; + dnssec-validation no; + recursion yes; + /* + * stale-answer-enable is not strictly required because serving + * stale answers is enabled in the test via rndc. + */ + stale-answer-enable yes; + stale-cache-enable yes; + stale-answer-ttl 3; + stale-answer-client-timeout disabled; + stale-refresh-time 4; + resolver-query-timeout 10; + fetches-per-zone 1 fail; + fetches-per-server 1 fail; + max-stale-ttl 3600; +}; + +zone "." { + type secondary; + primaries { 10.53.0.1; }; + file "root.bk"; +}; diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 28c35646280..419d7ce1559 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -2033,9 +2033,9 @@ if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) #################################################################### -# Test if fetch-limits quota is reached, stale data is served. # -#################################################################### -echo_i "test stale data with fetch-limits" +# Test serve-stale's interaction with fetch limits (cache only) # +################################################################# +echo_i "test serve-stale's interaction with fetch-limits (cache only)" # We update the named configuration to enable fetch-limits. The fetch-limits # are set to 1, which is ridiciously low, but that is because for this test we @@ -2141,5 +2141,76 @@ grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.te if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +######################################################################## +# Test serve-stale's interaction with fetch limits (dual-mode) # +######################################################################## +echo_i "test serve-stale's interaction with fetch limits (dual-mode)" + +# Update named configuration so that ns3 becomes a recursive resolver which is +# also a secondary server for the root zone. +n=$((n+1)) +echo_i "updating ns3/named.conf ($n)" +ret=0 +copy_setports ns3/named7.conf.in ns3/named.conf +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "running 'rndc reload' ($n)" +ret=0 +rndc_reload ns3 10.53.0.3 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Flush the cache to ensure the example/NS RRset cached during previous tests +# does not override the authoritative delegation found in the root zone. +n=$((n+1)) +echo_i "flush cache ($n)" +ret=0 +$RNDCCMD 10.53.0.3 flush > rndc.out.test$n 2>&1 || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Query name server with low fetch limits. The authoritative server (ans2) is +# not responding. Sending queries for multiple names in the 'example' zone +# in parallel causes the fetch limit for that zone (set to 1) to be +# reached. This should not trigger a crash. +echo_i "sending queries for tests $((n+1))-$((n+4))..." +$DIG -p ${PORT} @10.53.0.3 data.example TXT > dig.out.test$((n+1)) & +$DIG -p ${PORT} @10.53.0.3 othertype.example CAA > dig.out.test$((n+2)) & +$DIG -p ${PORT} @10.53.0.3 nodata.example TXT > dig.out.test$((n+3)) & +$DIG -p ${PORT} @10.53.0.3 nxdomain.example TXT > dig.out.test$((n+4)) + +wait + +# Expect SERVFAIL for the entries not in cache. +n=$((n+1)) +echo_i "check stale data.example (fetch-limits dual-mode) ($n)" +ret=0 +grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check stale othertype.example (fetch-limits dual-mode) ($n)" +ret=0 +grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check stale nodata.example (fetch-limits dual-mode) ($n)" +ret=0 +grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check stale nxdomain.example (fetch-limits dual-mode) ($n)" +ret=0 +grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/lib/ns/query.c b/lib/ns/query.c index 6bb0bb8523e..3af20bc4c9d 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -7168,6 +7168,7 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) { if (dns_view_staleanswerenabled(qctx->client->view)) { dns_db_attach(qctx->client->view->cachedb, &qctx->db); + qctx->version = NULL; qctx->client->query.dboptions |= DNS_DBFIND_STALEOK; if (qctx->client->query.fetch != NULL) { dns_resolver_destroyfetch(&qctx->client->query.fetch);