]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-scripts: Only send notifies for newly taken IPs
authorPeter Schwenke <pschwenke@ddn.com>
Tue, 29 Apr 2025 06:33:45 +0000 (16:33 +1000)
committerMartin Schwenke <martins@samba.org>
Wed, 18 Feb 2026 11:13:40 +0000 (11:13 +0000)
We no longer delete shared state (and send notifies) for
IPs previously held by the current node. The NFS lock manager
won't have released locks for these IPs, so won't generate
SM_MON on reclaim attempts.  Therefore, there will be
no add-client to put them back.

We now record newly taken IP addresses in takeip,
and only send notifies for those during
ipreallocated.  The extra notifies were also confusing
statd.

Update existing tests to always simulate taking all of a node's IPs.
This causes no output changes.

Test updates confirm the subtleties of the statd_callout_helper
behaviour change.  These pretend to only take a single IP, so
SM_NOTIFY must not be sent for other IPs.  Shared state should
remain for these other files.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=15939

Signed-off-by: Peter Schwenke <pschwenke@ddn.com>
Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Anoop C S <anoopcs@samba.org>
ctdb/config/events/legacy/60.nfs.script
ctdb/tests/UNIT/eventscripts/scripts/statd-callout.sh
ctdb/tests/UNIT/eventscripts/statd-callout.004.sh
ctdb/tests/UNIT/eventscripts/statd-callout.005.sh
ctdb/tests/UNIT/eventscripts/statd-callout.006.sh
ctdb/tests/UNIT/eventscripts/statd-callout.008.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/statd-callout.108.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/statd-callout.208.sh [new file with mode: 0755]
ctdb/tools/statd_callout_helper

index c59a0c18ea85e7faa4138e71fda6454d9d2925da..0146a39170c197ecd3cb8225bd2129443571ebbc 100755 (executable)
@@ -325,6 +325,9 @@ shutdown)
 
 takeip)
        nfs_callout "$@" || exit $?
+       if [ -x "${CTDB_HELPER_BINDIR}/statd_callout_helper" ] ; then
+               "${CTDB_HELPER_BINDIR}/statd_callout_helper" takeip "$3"
+       fi
        ctdb_service_set_reconfigure
        ;;
 
index 5b03ce3bb302c1365e93cfbf68a9198cfbfb7328..ff8333c16db3e57ba53d3da0cead0e582d1c9060 100644 (file)
@@ -132,8 +132,9 @@ check_statd_callout_smnotify()
 
        nfs_load_config
 
-       ctdb_get_my_public_addresses |
-               while read -r _ _sip _; do
+       find "$state_dir" -name "takeip@${FAKE_CTDB_PNN}@*" |
+               while read -r _f; do
+                       _sip="${_f##*@}"
                        _prefix="mon@${_sip}@"
                        find "$state_dir" -name "${_prefix}*" |
                                while read -r _f; do
@@ -143,7 +144,8 @@ SM_NOTIFY: ${_sip} -> ${_cip}, MON_NAME=${FAKE_NFS_HOSTNAME}, STATE=${_state}
 EOF
                                        rm -f "$_f"
                                done
-               done | {
+               done |
+               sort | {
                ok
                simple_test_event "notify"
        } || exit $?
@@ -202,9 +204,15 @@ EOF
                                rm -f "${state_dir}/mon@${_sip}@${1}"
                        done
                ;;
+       takeip)
+               cmd="${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/statd_callout_helper"
+               script_test "$cmd" "$event" "$@"
+               touch "${state_dir}/takeip@${FAKE_CTDB_PNN}@${1}"
+               ;;
        notify)
                cmd="${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/statd_callout_helper"
                script_test "$cmd" "$event" "$@"
+               rm -f "${state_dir}/takeip@${FAKE_CTDB_PNN}@"*
                ;;
        *)
                cmd="${CTDB_SCRIPTS_TOOLS_HELPER_DIR}/statd_callout_helper"
index adff2934186e4055f5d74fa4f57bc38605276d66..2ea8012944636f7c1e8529375a02d10d7538fe9f 100755 (executable)
@@ -13,6 +13,10 @@ setup "$mode"
 
 ok_null
 simple_test_event "startup"
+ctdb_get_my_public_addresses |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
 simple_test_event "add-client" "192.168.123.45"
 simple_test_event "update"
 
index 2c2fb7ecde60916ace8f057689e020e5346490e2..b3dcfcad3600602ad532d72bdb54634e7507b213 100755 (executable)
@@ -13,6 +13,10 @@ setup "$mode"
 
 ok_null
 simple_test_event "startup"
+ctdb_get_my_public_addresses |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
 simple_test_event "add-client" "192.168.123.45"
 simple_test_event "update"
 
@@ -20,6 +24,10 @@ ctdb_set_pnn 1
 
 ok_null
 simple_test_event "startup"
+ctdb_get_my_public_addresses |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
 simple_test_event "add-client" "192.168.123.46"
 simple_test_event "update"
 
index ac64718b7fc6c35e8d80b4eb2d0668262e556228..35a7f6dcd815fbce1db3171416f752c1b99b7e26 100755 (executable)
@@ -13,6 +13,10 @@ setup "$mode"
 
 ok_null
 simple_test_event "startup"
+ctdb_get_my_public_addresses |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
 simple_test_event "add-client" "192.168.123.45"
 simple_test_event "update"
 
@@ -20,6 +24,10 @@ ctdb_set_pnn 1
 
 ok_null
 simple_test_event "startup"
+ctdb_get_my_public_addresses |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
 simple_test_event "add-client" "192.168.123.46"
 simple_test_event "update"
 
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.008.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.008.sh
new file mode 100755 (executable)
index 0000000..73e4c5b
--- /dev/null
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+if [ -z "$CTDB_STATD_CALLOUT_SHARED_STORAGE" ]; then
+       CTDB_STATD_CALLOUT_SHARED_STORAGE="persistent_db"
+fi
+mode="$CTDB_STATD_CALLOUT_SHARED_STORAGE"
+
+define_test "${mode} - add-client on different nodes, take 1 IP, notify on both"
+
+setup "$mode"
+
+ok_null
+simple_test_event "startup"
+ctdb_get_1_public_address |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
+simple_test_event "add-client" "192.168.123.45"
+simple_test_event "update"
+
+ctdb_set_pnn 1
+
+ok_null
+simple_test_event "startup"
+ctdb_get_1_public_address |
+       while read -r _ sip _; do
+               simple_test_event "takeip" "$sip"
+       done
+simple_test_event "add-client" "192.168.123.46"
+simple_test_event "update"
+
+ctdb_set_pnn 0
+
+check_statd_callout_smnotify
+
+ctdb_set_pnn 1
+
+check_statd_callout_smnotify
+
+check_shared_storage_statd_state
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.108.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.108.sh
new file mode 100755 (executable)
index 0000000..224b584
--- /dev/null
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+CTDB_STATD_CALLOUT_SHARED_STORAGE="shared_dir"
+
+_dir=$(dirname "$0")
+. "${_dir}/statd-callout.008.sh"
diff --git a/ctdb/tests/UNIT/eventscripts/statd-callout.208.sh b/ctdb/tests/UNIT/eventscripts/statd-callout.208.sh
new file mode 100755 (executable)
index 0000000..6fc8bf1
--- /dev/null
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+CTDB_STATD_CALLOUT_SHARED_STORAGE="none"
+
+_dir=$(dirname "$0")
+. "${_dir}/statd-callout.008.sh"
index 6ab466f74b2b0b51bf6ed4e001d1cf9b2d39bc02..98c1a7c6e4b49a989502e008e007511c274ccac4 100755 (executable)
@@ -80,6 +80,7 @@ create_add_del_client_dir()
 # script_state_dir set by ctdb_setup_state_dir()
 # shellcheck disable=SC2154
 statd_callout_state_dir="${script_state_dir}/statd_callout"
+statd_new_ips_file="${statd_callout_state_dir}/new_ips.txt"
 
 # Set default value, if necessary
 : "${CTDB_STATD_CALLOUT_SHARED_STORAGE:=persistent_db}"
@@ -216,9 +217,10 @@ persistent_db_grep_filter="${statd_callout_state_dir}/.grep_filter"
 
 persistent_db_make_grep_filter()
 {
+       _ips_file="$1"
        while read -r _ip; do
                echo "statd-state@${_ip}@"
-       done <"$CTDB_MY_PUBLIC_IPS_CACHE" >"$persistent_db_grep_filter"
+       done <"$_ips_file" >"$persistent_db_grep_filter"
 }
 
 update_persistent_db()
@@ -231,7 +233,7 @@ update_persistent_db()
                exit 0
        fi
 
-       persistent_db_make_grep_filter
+       persistent_db_make_grep_filter "$CTDB_MY_PUBLIC_IPS_CACHE"
 
        # Use cat instead of direct grep since POSIX grep does not
        # have -h
@@ -249,10 +251,14 @@ update_persistent_db()
 
 list_records_persistent_db()
 {
-       persistent_db_make_grep_filter
+       persistent_db_make_grep_filter "$statd_new_ips_file"
 
+       # Redirect below to /dev/null is because some versions of grep
+       # appear to not drain the input if the file passed to -f is
+       # empty (so it matches nothing).  This can cause the first sed
+       # command in the pipeline to exit with EPIPE.
        $CTDB catdb "$statd_callout_db" |
-               sed -n -e 's|^key([0-9]*) = "\([^"]*\)".*|\1|p' |
+               sed -n -e 's|^key([0-9]*) = "\([^"]*\)".*|\1|p' 2>/dev/null |
                grep -F -f "$persistent_db_grep_filter" |
                sed -e 's|statd-state@\([^@]*\)@\(.*\)|\1 \2|'
 
@@ -308,11 +314,27 @@ update_shared_dir()
        :
 }
 
+save_ip()
+{
+       _ip_addr=$1
+       _f="$statd_new_ips_file"
+       _lock="${_f}.lock"
+       _new="${_f}.new.$$"
+       {
+               flock --timeout 10 9 ||
+                       die "statd_callout_helper save_ip: timeout"
+
+               cat "$_f" 2>/dev/null >"$_new"
+               echo "$_ip_addr" >>"$_new"
+               mv "$_new" "$_f"
+       } 9>"$_lock"
+}
+
 list_records_shared_dir()
 {
        while read -r _ip; do
                ls "${statd_callout_shared_dir}/statd-state@${_ip}@"*
-       done <"$CTDB_MY_PUBLIC_IPS_CACHE" |
+       done <"${statd_new_ips_file}" |
                while read -r _f; do
                        if [ ! -f "$_f" ]; then
                                continue
@@ -379,6 +401,11 @@ startup()
 
        mkdir -p "$statd_callout_state_dir"
 
+       # Create an empty file.  Some of the code that processes the
+       # file can't cope with a missing file, which can happen if a
+       # node doesn't take any IPs between takeover runs.
+       : >"${statd_new_ips_file}"
+
        "startup_${statd_callout_mode}" "$_config_file"
 }
 
@@ -424,6 +451,11 @@ update)
        update
        ;;
 
+takeip)
+       _ip_addr=$2
+       save_ip "$_ip_addr"
+       ;;
+
 notify)
        # we must restart the lockmanager (on all nodes) so that we get
        # a clusterwide grace period (so other clients don't take out
@@ -437,6 +469,8 @@ notify)
 
        statd_state="${statd_callout_state_dir}/.statd_state"
        list_records >"$statd_state"
+       # Empty the file but don't remove it - see comment in startup()
+       : >"${statd_new_ips_file}"
 
        if [ ! -s "$statd_state" ]; then
                rm -f "$statd_state"