]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-tests: Add a test for stalled node triggering election
authorMartin Schwenke <martin@meltin.net>
Sat, 22 Jan 2022 20:08:02 +0000 (07:08 +1100)
committerAmitay Isaacs <amitay@samba.org>
Mon, 14 Feb 2022 02:46:01 +0000 (02:46 +0000)
A stalled node probably continues to hold the cluster lock, so confirm
elections work in this case.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14958

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
Autobuild-User(master): Amitay Isaacs <amitay@samba.org>
Autobuild-Date(master): Mon Feb 14 02:46:01 UTC 2022 on sn-devel-184

ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh [new file with mode: 0755]

diff --git a/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh b/ctdb/tests/INTEGRATION/simple/cluster.030.node_stall_leader_timeout.sh
new file mode 100755 (executable)
index 0000000..7bca58c
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Verify that nothing bad occurs if a node stalls and the leader
+# broadcast timeout triggers
+
+. "${TEST_SCRIPTS_DIR}/integration.bash"
+
+set -e
+
+ctdb_test_init
+
+select_test_node
+echo
+
+echo 'Get "leader timeout":'
+conf_tool="${CTDB_SCRIPTS_HELPER_BINDIR}/ctdb-config"
+# shellcheck disable=SC2154
+# $test_node set by select_test_node() above
+try_command_on_node "$test_node" "${conf_tool} get cluster 'leader timeout'"
+# shellcheck disable=SC2154
+# $out set by ctdb_onnode() above
+leader_timeout="$out"
+echo "Leader timeout is ${leader_timeout} seconds"
+echo
+
+# Assume leader timeout is reasonable and doesn't cause node to be
+# disconnected
+stall_time=$((leader_timeout * 2))
+
+generation_get "$test_node"
+
+echo "Get ctdbd PID on node ${test_node}..."
+ctdb_onnode -v "$test_node" "getpid"
+ctdbd_pid="$out"
+echo
+
+echo "Sending SIGSTOP to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -STOP ${ctdbd_pid}"
+
+sleep_for "$stall_time"
+
+echo "Sending SIGCONT to ctdbd on ${test_node}"
+try_command_on_node "$test_node" "kill -CONT ${ctdbd_pid}"
+echo
+
+wait_until_generation_has_changed "$test_node"
+
+cluster_is_healthy