From: Evan Hunt Date: Wed, 6 Nov 2019 00:14:06 +0000 (-0800) Subject: adjust system tests to deal with possible timing issues X-Git-Tag: v9.15.6~21^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=24510a1fda71d1c3c47eb2e69a9694ecf1fa2ef3;p=thirdparty%2Fbind9.git adjust system tests to deal with possible timing issues With the netmgr in use, named may start answering queries before zones are loaded. This can cause transient failures in system tests after servers are restarted or reconfigured. This commit adds retry loops and sleep statements where needed to address this problem. Also incidentally silenced a clang warning. --- diff --git a/bin/tests/system/addzone/tests.sh b/bin/tests/system/addzone/tests.sh index 4e5301f6aea..a4c1ca425db 100755 --- a/bin/tests/system/addzone/tests.sh +++ b/bin/tests/system/addzone/tests.sh @@ -696,11 +696,17 @@ $RNDCCMD 10.53.0.3 addzone "test4.baz" '{ type master; file "e.db"; };' > /dev/n $RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1 $PERL $SYSTEMTESTTOP/stop.pl addzone ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1 -$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || ret=1 -grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 -n=`expr $n + 1` +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || iret=1 + grep "status: NOERROR" dig.out.test$n > /dev/null || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/dlzexternal/driver.c b/bin/tests/system/dlzexternal/driver.c index b969b9e053c..4774118a0b6 100644 --- a/bin/tests/system/dlzexternal/driver.c +++ b/bin/tests/system/dlzexternal/driver.c @@ -124,13 +124,13 @@ add_name(struct dlz_example_data *state, struct record *list, strlen(data) >= sizeof(list[i].data)) return (ISC_R_NOSPACE); - strncpy(list[i].name, name, sizeof(list[i].name)); + strncpy(list[i].name, name, sizeof(list[i].name) - 1); list[i].name[sizeof(list[i].name) - 1] = '\0'; - strncpy(list[i].type, type, sizeof(list[i].type)); + strncpy(list[i].type, type, sizeof(list[i].type) - 1); list[i].type[sizeof(list[i].type) - 1] = '\0'; - strncpy(list[i].data, data, sizeof(list[i].data)); + strncpy(list[i].data, data, sizeof(list[i].data) - 1); list[i].data[sizeof(list[i].data) - 1] = '\0'; list[i].ttl = ttl; diff --git a/bin/tests/system/ecdsa/tests.sh b/bin/tests/system/ecdsa/tests.sh index c4ceefc3460..7cddfd6ce53 100644 --- a/bin/tests/system/ecdsa/tests.sh +++ b/bin/tests/system/ecdsa/tests.sh @@ -20,7 +20,6 @@ rm -f dig.out.* DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300" # Check the example. domain - echo "I:checking that positive validation works ($n)" ret=0 $DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1 diff --git a/bin/tests/system/forward/tests.sh b/bin/tests/system/forward/tests.sh index 36fd8a00409..1c3096cb79b 100644 --- a/bin/tests/system/forward/tests.sh +++ b/bin/tests/system/forward/tests.sh @@ -98,10 +98,15 @@ status=`expr $status + $ret` echo_i "checking that forward only zone overrides empty zone" ret=0 -$DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 -$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + $DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 + $DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/legacy/clean.sh b/bin/tests/system/legacy/clean.sh index ad7ef8540df..f8831857465 100644 --- a/bin/tests/system/legacy/clean.sh +++ b/bin/tests/system/legacy/clean.sh @@ -14,6 +14,7 @@ rm -f ns*/named.run rm -f ns*/named.lock # build.sh +rm -f ns1/named_dump.db* rm -f ns6/K* rm -f ns6/dsset-* rm -f ns6/edns512.db diff --git a/bin/tests/system/legacy/tests.sh b/bin/tests/system/legacy/tests.sh index ed784d9615d..8cbbeef53b9 100755 --- a/bin/tests/system/legacy/tests.sh +++ b/bin/tests/system/legacy/tests.sh @@ -259,8 +259,13 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} legacy ns1 n=`expr $n + 1` echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)" -ret=0 -resolution_fails edns512-notcp. || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + resolution_fails edns512-notcp. || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/nsupdate/tests.sh b/bin/tests/system/nsupdate/tests.sh index b73d1785bbc..83922eda73b 100755 --- a/bin/tests/system/nsupdate/tests.sh +++ b/bin/tests/system/nsupdate/tests.sh @@ -506,7 +506,6 @@ grep "add nsec3param.test. 0 IN TYPE65534 .# 6 000140000400" jp.out.ns3.$n > /de if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi - ret=0 echo_i "testing that rndc stop updates the master file" $NSUPDATE -k ns1/ddns.key < /dev/null || ret=1 @@ -514,16 +513,24 @@ server 10.53.0.1 ${PORT} update add updated4.example.nil. 600 A 10.10.10.3 send END +sleep 3 $PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1 +sleep 3 # Removing the journal file and restarting the server means # that the data served by the new server process are exactly # those dumped to the master file by "rndc stop". rm -f ns1/*jnl $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1 -$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd updated4.example.nil.\ - @10.53.0.1 a > dig.out.ns1 || status=1 -digcomp knowngood.ns1.afterstop dig.out.ns1 || ret=1 -[ $ret = 0 ] || { echo_i "failed"; status=1; } +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd \ + updated4.example.nil. @10.53.0.1 a > dig.out.ns1 || iret=1 + digcomp knowngood.ns1.afterstop dig.out.ns1 || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 +[ "$ret" -eq 0 ] || { echo_i "failed"; status=1; } ret=0 echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file" diff --git a/bin/tests/system/nzd2nzf/tests.sh b/bin/tests/system/nzd2nzf/tests.sh index ea013af48d8..34ede6e4e74 100644 --- a/bin/tests/system/nzd2nzf/tests.sh +++ b/bin/tests/system/nzd2nzf/tests.sh @@ -61,9 +61,14 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nzd2nzf ns1 n=`expr $n + 1` echo_i "querying for zone data from migrated zone config ($n)" -ret=0 -$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 -grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failures +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 + grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done n=`expr $n + 1` if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/resolver/ns7/named2.conf.in b/bin/tests/system/resolver/ns7/named2.conf.in index b966e783b7d..787705984d1 100644 --- a/bin/tests/system/resolver/ns7/named2.conf.in +++ b/bin/tests/system/resolver/ns7/named2.conf.in @@ -12,7 +12,7 @@ // NS7 options { - query-source address 10.53.0.7 port @PORT@ dscp 13; + query-source address 10.53.0.7 dscp 13; notify-source 10.53.0.7 dscp 14; transfer-source 10.53.0.7 dscp 15; port @PORT@; diff --git a/bin/tests/system/rpz/tests.sh b/bin/tests/system/rpz/tests.sh index 255779f785a..88f74d0576b 100644 --- a/bin/tests/system/rpz/tests.sh +++ b/bin/tests/system/rpz/tests.sh @@ -219,6 +219,7 @@ restart () { $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1 load_db dnsrps_loaded + sleep 1 } # $1=server and irrelevant args @@ -465,6 +466,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz + sleep 3 fi ;; esac diff --git a/bin/tests/system/rpzrecurse/tests.sh b/bin/tests/system/rpzrecurse/tests.sh index 763cc2b5f84..11160cacdfa 100644 --- a/bin/tests/system/rpzrecurse/tests.sh +++ b/bin/tests/system/rpzrecurse/tests.sh @@ -135,6 +135,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse + sleep 3 fi ;; esac diff --git a/bin/tests/system/statistics/tests.sh b/bin/tests/system/statistics/tests.sh index ccbca449968..ce82d0d2a8d 100644 --- a/bin/tests/system/statistics/tests.sh +++ b/bin/tests/system/statistics/tests.sh @@ -71,7 +71,7 @@ $RNDCCMD -s 10.53.0.3 stats > /dev/null 2>&1 [ -f ns3/named.stats ] || ret=1 if [ ! "$CYGWIN" ]; then nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ 0 -ne ${nsock0nstat:-0} ] || ret=1 + [ 0 -eq ${nsock0nstat:-0} ] || ret=1 fi if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` @@ -107,7 +107,7 @@ if [ ! "$CYGWIN" ]; then ret=0 echo_i "verifying active sockets output in named.stats ($n)" nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ `expr $nsock1nstat - $nsock0nstat` -eq 1 ] || ret=1 + [ `expr ${nsock1nstat:-0} - ${nsock0nstat:-0}` -eq 1 ] || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` n=`expr $n + 1` diff --git a/bin/tests/system/tcp/ans6/ans.py b/bin/tests/system/tcp/ans6/ans.py index 3debf19e207..331ac7fbd17 100644 --- a/bin/tests/system/tcp/ans6/ans.py +++ b/bin/tests/system/tcp/ans6/ans.py @@ -42,7 +42,7 @@ import time # Timeout for establishing all connections requested by a single 'open' command. OPEN_TIMEOUT = 2 - +VERSION_QUERY = b'\x00\x1e\xaf\xb8\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07version\x04bind\x00\x00\x10\x00\x03' def log(msg): print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg) @@ -84,6 +84,7 @@ def open_connections(active_conns, count, host, port): log('%s for socket %s' % (errno.errorcode[err], sock)) errors.append(sock) else: + sock.send(VERSION_QUERY) active_conns.append(sock) if errors: diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index 3af94320314..faf2e1ba78d 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -163,8 +163,12 @@ check_stats_limit() { assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1 } retry 2 check_stats_limit || ret=1 +close_connections $((TCP_LIMIT + 1)) if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) +# wait for connections to close +sleep 5 + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/unknown/tests.sh b/bin/tests/system/unknown/tests.sh index 190b84020d3..eeb8920ffa8 100644 --- a/bin/tests/system/unknown/tests.sh +++ b/bin/tests/system/unknown/tests.sh @@ -122,16 +122,24 @@ do done echo_i "checking large unknown record loading on master" -ret=0 -$DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -139,10 +147,16 @@ echo_i "stop and restart slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns2 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -157,10 +171,16 @@ echo_i "stop and restart inline slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on inline slave" -ret=0 -$DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` diff --git a/bin/tests/system/upforwd/ns3/named.conf.in b/bin/tests/system/upforwd/ns3/named.conf.in index e440a1f0d29..d037e745e85 100644 --- a/bin/tests/system/upforwd/ns3/named.conf.in +++ b/bin/tests/system/upforwd/ns3/named.conf.in @@ -17,7 +17,7 @@ options { pid-file "named.pid"; listen-on { 10.53.0.3; }; listen-on-v6 { none; }; - recursion yes; + recursion no; notify yes; }; diff --git a/bin/tests/system/upforwd/tests.sh b/bin/tests/system/upforwd/tests.sh index b0694bbd5cb..3b0d7b3998e 100644 --- a/bin/tests/system/upforwd/tests.sh +++ b/bin/tests/system/upforwd/tests.sh @@ -21,8 +21,6 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}" status=0 n=1 -sleep 5 - echo_i "waiting for servers to be ready for testing ($n)" for i in 1 2 3 4 5 6 7 8 9 10 do diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 11a27cb364f..13fc762c4a4 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -431,11 +431,17 @@ $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.1.$n grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1 $PERL $SYSTEMTESTTOP/stop.pl xfer ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3 -$DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n -grep "status: NOERROR," dig.out.2.$n > /dev/null || tmp=1 -$DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n -digcomp knowngood.mapped dig.out.3.$n || tmp=1 -if test $tmp != 0 ; then echo_i "failed"; fi +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n + grep "status: NOERROR," dig.out.2.$n > /dev/null || iret=1 + $DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n + digcomp knowngood.mapped dig.out.3.$n || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -eq 0 ] || tmp=1 +[ "$tmp" -ne 0 ] && echo_i "failed" status=`expr $status + $tmp` n=`expr $n + 1`