fi
eval "$service_stats_cmd" >"$_curr" 2>&1
+ # Only consider statistics on timeout. This
+ # is done below by checking if this string is
+ # contained in $_err.
+ _t="rpcinfo: RPC: Timed out"
+
if ! $_ok &&
+ [ "${_err#*"${_t}"}" != "$_err" ] &&
! cmp "$_prev" "$_curr" >/dev/null 2>&1; then
# Stats always implicitly change on
# the first monitor event, since
--- /dev/null
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "NFS RPC service timeout, stats change, 10 iterations"
+
+setup
+
+cat >"${CTDB_BASE}/nfs-checks.d/20.nfs.check" <<EOF
+# nfs
+version="3"
+restart_every=10
+unhealthy_after=2
+service_stop_cmd="\$CTDB_NFS_CALLOUT stop nfs"
+service_start_cmd="\$CTDB_NFS_CALLOUT start nfs"
+service_debug_cmd="program_stack_traces nfsd 5"
+# Dummy pipeline confirms that pipelines work in this context
+service_stats_cmd="date --rfc-3339=ns | grep ."
+EOF
+
+# Test flag to indicate that stats are expected to change
+nfs_stats_set_changed "nfs" "status"
+
+nfs_iterate_test 10 "nfs:TIMEOUT"
--- /dev/null
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "NFS RPC service timeout, stats don't change, 10 iterations"
+
+setup
+
+cat >"${CTDB_BASE}/nfs-checks.d/20.nfs.check" <<EOF
+# nfs
+version="3"
+restart_every=10
+unhealthy_after=2
+service_stop_cmd="\$CTDB_NFS_CALLOUT stop nfs"
+service_start_cmd="\$CTDB_NFS_CALLOUT start nfs"
+service_debug_cmd="program_stack_traces nfsd 5"
+# Dummy pipeline confirms that pipelines work in this context
+service_stats_cmd="echo 'hello world' | grep ."
+EOF
+
+# Test flag to indicate that stats are expected to change
+nfs_stats_set_changed "status"
+
+nfs_iterate_test 10 "nfs:TIMEOUT"
FAKE_RPCINFO_SERVICES="$_out"
}
+_rpc_services_timeout()
+{
+ _out=""
+ for _s in $FAKE_RPCINFO_SERVICES; do
+ for _i; do
+ if [ "$_i" = "${_s%%:*}" ]; then
+ debug "Marking RPC service \"${_i}\" as TIMEOUT"
+ _s="${_s}:TIMEOUT"
+ fi
+ done
+ _out="${_out}${_out:+ }${_s}"
+ done
+ FAKE_RPCINFO_SERVICES="$_out"
+}
+
_rpc_services_up()
{
_out="$FAKE_RPCINFO_SERVICES"
return 1
}
+rpcinfo_timed_out()
+{
+ echo "$1" | grep -q "Timed out"
+}
+
guess_output()
{
case "$1" in
_err_or_warn="$1"
_rpc_service="$2"
_ver="$3"
+ _why="${4:-Program not registered}"
cat <<EOF
${_err_or_warn} ${_rpc_service} failed RPC check:
-rpcinfo: RPC: Program not registered
+rpcinfo: RPC: ${_why}
program ${_rpc_service}${_ver:+ version }${_ver} is not available
EOF
}
esac
fi
- if rpcinfo -T tcp localhost "$_rpc_service" \
- >/dev/null 2>&1; then
-
+ _why=""
+ _ri_out=$(rpcinfo -T tcp localhost "$_rpc_service" 2>&1)
+ # Check exit code separately for readability
+ # shellcheck disable=SC2181
+ if [ $? -eq 0 ]; then
echo 0 >"$_failcount_file"
exit # from subshell
- elif nfs_stats_check_changed \
- "$_rpc_service" "$_iteration"; then
+ elif rpcinfo_timed_out "$_ri_out"; then
+ _why="Timed out"
- rpc_failure \
- "WARNING: statistics changed but" \
- "$_rpc_service" \
- "$_ver" \
- >"$_out"
- echo 0 >"$_failcount_file"
- exit # from subshell
+ if nfs_stats_check_changed \
+ "$_rpc_service" "$_iteration"; then
+
+ rpc_failure \
+ "WARNING: statistics changed but" \
+ "$_rpc_service" \
+ "$_ver" \
+ "$_why" \
+ >"$_out"
+ echo 0 >"$_failcount_file"
+ exit # from subshell
+ fi
fi
_numfails=$((_numfails + 1))
"ERROR:" \
"$_rpc_service" \
"$_ver" \
+ "$_why" \
>"$_out"
else
_unhealthy=false
"WARNING:" \
"$_rpc_service" \
"$_ver" \
+ "$_why" \
>"$_out"
fi
#
# - 1st argument is the number of iterations.
#
-# - 2nd argument is the NFS/RPC service being tested
+# - 2nd argument is the NFS/RPC service being tested, with optional
+# TIMEOUT flag
#
# This service is marked down before the 1st iteration.
#
debug <<EOF
--------------------------------------------------
EOF
- _rpc_services_down "$_rpc_service"
+
+ _action="${_rpc_service#*:}"
+ if [ "$_action" != "$_rpc_service" ]; then
+ _rpc_service="${_rpc_service%:*}"
+ else
+ _action=""
+ fi
+
+ if [ -n "$_action" ]; then
+ case "$_action" in
+ TIMEOUT)
+ _rpc_services_timeout "$_rpc_service"
+ ;;
+ esac
+ else
+ _rpc_services_down "$_rpc_service"
+ fi
fi
debug <<EOF
parse_options "$@"
+_fail_msg="rpcinfo: RPC: Program not registered"
+
for i in ${FAKE_RPCINFO_SERVICES}; do
# This is stupidly cumulative, but needs to happen after the
# initial split of the list above.
# Want glob expansion
# shellcheck disable=SC2086
set -- $i
- # $1 = program, $2 = low version, $3 = high version
+ # $1 = program, $2 = low version, $3 = high version, $4 = flag
if [ "$1" = "$p" ]; then
+ case "$4" in
+ TIMEOUT)
+ _fail_msg="rpcinfo: RPC: Timed out"
+ break
+ ;;
+ esac
+
if [ -n "$v" ]; then
if [ "$2" -le "$v" ] && [ "$v" -le "$3" ]; then
echo "program ${p} version ${v} ready and waiting"
fi
done
-echo "rpcinfo: RPC: Program not registered" >&2
+echo "$_fail_msg" >&2
if [ -n "$v" ]; then
echo "program ${p} version ${v} is not available"
else