]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-tests: Add some tests to check recovery from recovery lock issues
authorMartin Schwenke <martin@meltin.net>
Fri, 10 Jan 2020 04:45:48 +0000 (15:45 +1100)
committerAmitay Isaacs <amitay@samba.org>
Tue, 21 Jan 2020 11:39:40 +0000 (11:39 +0000)
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh [new file with mode: 0755]
ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh [new file with mode: 0755]

diff --git a/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh b/ctdb/tests/INTEGRATION/simple/cluster.015.reclock_remove_lock.sh
new file mode 100755 (executable)
index 0000000..d74bcf8
--- /dev/null
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Verify that the cluster recovers if the recovery lock is removed.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -r 5
+
+generation_has_changed ()
+{
+       local node="$1"
+       local generation_init="$2"
+
+       # Leak this so it can be printed by test
+       generation_new=""
+
+       ctdb_onnode "$node" status
+       # shellcheck disable=SC2154
+       # $outfile set by ctdb_onnode() above
+       generation_new=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+
+       [ "$generation_new" != "$generation_init" ]
+}
+
+select_test_node
+
+echo "Get recovery lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+       ctdb_test_skip "Recovery lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+       ctdb_test_error "Recovery lock file \"${reclock}\" is missing"
+fi
+
+echo "Recovery lock setting is \"${reclock_setting}\""
+echo "Recovery lock file is \"${reclock}\""
+echo
+
+echo "Get current recovery master"
+ctdb_onnode "$test_node" recmaster
+recmaster="$out"
+echo "Recovery master is node ${recmaster}"
+echo
+
+echo "Get initial generation"
+ctdb_onnode "$test_node" status
+# shellcheck disable=SC2154
+# $outfile set by ctdb_onnode() above
+generation_init=$(sed -n -e 's/^Generation:\([0-9]*\)/\1/p' "$outfile")
+echo "Initial generation is ${generation_init}"
+echo
+
+echo "Remove recovery lock"
+rm "$reclock"
+echo
+
+# This will mean an election has taken place and a recovery has occured
+echo "Wait until generation changes"
+wait_until 30 generation_has_changed "$test_node" "$generation_init"
+echo
+echo "Generation changed to ${generation_new}"
+echo
+
+echo "Get current recovery master"
+ctdb_onnode "$test_node" recmaster
+recmaster_new="$out"
+
+if [ "$recmaster" != "$recmaster_new" ] ; then
+       ctdb_test_fail \
+               "BAD: Recovery master has changed to node ${recmaster_new}"
+fi
+echo "GOOD: Recovery master is still node ${recmaster_new}"
+echo
+
+cluster_is_healthy
diff --git a/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh b/ctdb/tests/INTEGRATION/simple/cluster.016.reclock_move_lock_dir.sh
new file mode 100755 (executable)
index 0000000..cd193f0
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Verify that if the directory containing the recovery lock is moved
+# then all nodes are banned (because they can't take the lock).
+# Confirm that if the directory is moved back and the bans time out
+# then the cluster returns to good health.
+
+# This simulates the cluster filesystem containing the recovery lock
+# being unmounted and remounted.
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_skip_on_cluster
+
+ctdb_test_init -r 5
+
+all_nodes_are_banned ()
+{
+       node="$1"
+
+       ctdb_onnode "$node" nodestatus
+       [ $? -eq 1 ]
+
+       # shellcheck disable=SC2154
+       # $out set by ctdb_onnode() above
+       [ "$out" = "Warning: All nodes are banned." ]
+}
+
+select_test_node
+
+echo "Get recovery lock setting"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+ctdb_onnode "$test_node" getreclock
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+reclock_setting="$out"
+
+if [ -z "$reclock_setting" ] ; then
+       ctdb_test_skip "Recovery lock is not set"
+fi
+
+t="${reclock_setting% 5}"
+reclock="${t##* }"
+
+if [ ! -f "$reclock" ] ; then
+       ctdb_test_error "Recovery lock file \"${reclock}\" is missing"
+fi
+
+echo "Recovery lock setting is \"${reclock_setting}\""
+echo "Recovery lock file is \"${reclock}\""
+echo
+
+echo "Set ban period to 30s"
+ctdb_onnode all setvar RecoveryBanPeriod 30
+echo
+
+dir=$(dirname "$reclock")
+
+echo "Rename recovery lock directory"
+mv "$dir" "${dir}.$$"
+echo
+
+echo "Wait until all nodes are banned"
+wait_until 60 all_nodes_are_banned "$test_node"
+echo
+
+echo "Restore recovery lock directory"
+mv "${dir}.$$" "$dir"
+echo
+
+wait_until_ready 60