From: Martin Schwenke Date: Sat, 29 Jun 2024 09:24:25 +0000 (+1000) Subject: ctdb-scripts: Make initial statistics output empty X-Git-Tag: tdb-1.4.13~1329 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=18a29ed367278849889a846bb93f49afd0c045a8;p=thirdparty%2Fsamba.git ctdb-scripts: Make initial statistics output empty This makes initial failure to retrieve statistics less likely to result in a statistics change. To help with this, statistics retrieval stderr now goes to the log - only stdout goes to the file. This means that the test code for checking statistics changes needs to be redone to actually run the statistics command and check. As with rpcinfo output, this output needs to behave as deterministically in the test code as it done in the event script. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- diff --git a/ctdb/config/events/legacy/60.nfs.script b/ctdb/config/events/legacy/60.nfs.script index bdea3d63fa1..61e25a5ea0e 100755 --- a/ctdb/config/events/legacy/60.nfs.script +++ b/ctdb/config/events/legacy/60.nfs.script @@ -164,20 +164,22 @@ nfs_check_service() if [ -f "$_curr" ]; then mv -f "$_curr" "$_prev" + else + # Make initial stats empty, so a + # failed attempt to retrieve them on + # service stall is less likely to + # result in a false stats change + : >"$_prev" fi - eval "$service_stats_cmd" >"$_curr" 2>&1 + eval "$service_stats_cmd" >"$_curr" # Only consider statistics on timeout. This # is done below by checking if this string is # contained in $_err. _t="rpcinfo: RPC: Timed out" - if ! $_ok && [ "${_err#*"${_t}"}" != "$_err" ] && ! cmp "$_prev" "$_curr" >/dev/null 2>&1; then - # Stats always implicitly change on - # the first monitor event, since - # previous stats don't exists... echo "WARNING: statistics changed but ${_err}" _ok=true fi diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.115.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.115.sh index b51e20d0740..7afb906049d 100755 --- a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.115.sh +++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.115.sh @@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5" service_stats_cmd="date --rfc-3339=ns | grep ." EOF -# Test flag to indicate that stats are expected to change -nfs_stats_set_changed "nfs" "status" - nfs_iterate_test 10 "nfs" diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.116.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.116.sh index 6865c9eca98..a2025e93680 100755 --- a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.116.sh +++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.116.sh @@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5" service_stats_cmd="echo 'hello world' | grep ." EOF -# Test flag to indicate that stats are expected to change -nfs_stats_set_changed "status" - nfs_iterate_test 10 "nfs" diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.117.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.117.sh index 9d96bf1891a..0bac550c52f 100755 --- a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.117.sh +++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.117.sh @@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5" service_stats_cmd="date --rfc-3339=ns | grep ." EOF -# Test flag to indicate that stats are expected to change -nfs_stats_set_changed "nfs" "status" - nfs_iterate_test 10 "nfs:TIMEOUT" diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.118.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.118.sh index 76013f10eae..ef94d7fba9b 100755 --- a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.118.sh +++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.118.sh @@ -18,7 +18,4 @@ service_debug_cmd="program_stack_traces nfsd 5" service_stats_cmd="echo 'hello world' | grep ." EOF -# Test flag to indicate that stats are expected to change -nfs_stats_set_changed "status" - nfs_iterate_test 10 "nfs:TIMEOUT" diff --git a/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.119.sh b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.119.sh new file mode 100755 index 00000000000..c291f69b82e --- /dev/null +++ b/ctdb/tests/UNIT/eventscripts/60.nfs.monitor.119.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +. "${TEST_SCRIPTS_DIR}/unit.sh" + +define_test "NFS RPC service timeout, silent stats error, 10 iterations" + +# It would be nice to have a non-silent stats error... but that's a +# bit hard for the current test code to handle. :-( + +setup + +cat >"${CTDB_BASE}/nfs-checks.d/20.nfs.check" <"$_prev" + if [ -e "$_curr" ]; then + mv "$_curr" "$_prev" fi - return 1 + eval "$_cmd" >"$_curr" + + ! diff "$_prev" "$_curr" >/dev/null } rpcinfo_timed_out() @@ -344,6 +342,7 @@ rpc_set_service_failure_response() # Unused, but for completeness, possible future use service_check_cmd="" service_debug_cmd="" + service_stats_cmd="" # Don't bother syntax checking, eventscript does that... . "$_file" @@ -360,6 +359,17 @@ rpc_set_service_failure_response() esac fi + # It doesn't matter here if the statistics have + # changed. However, this generates the current + # statistics, which needs to happen, regardless of + # service health, so they can be compared when they + # matter... + _stats_changed=false + if nfs_stats_check_changed \ + "$_rpc_service" "$service_stats_cmd"; then + _stats_changed=true + fi + _why="" _ri_out=$(rpcinfo -T tcp localhost "$_rpc_service" 2>&1) # Check exit code separately for readability @@ -370,9 +380,7 @@ rpc_set_service_failure_response() elif rpcinfo_timed_out "$_ri_out"; then _why="Timed out" - if nfs_stats_check_changed \ - "$_rpc_service" "$_iteration"; then - + if $_stats_changed; then rpc_failure \ "WARNING: statistics changed but" \ "$_rpc_service" \