]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-scripts: Add configuration variable CTDB_KILLTCP_USE_SS_KILL
authorMartin Schwenke <mschwenke@ddn.com>
Tue, 22 Aug 2023 02:13:44 +0000 (12:13 +1000)
committerMartin Schwenke <martins@samba.org>
Thu, 7 Nov 2024 00:12:34 +0000 (00:12 +0000)
This allows CTDB to be configured to use "ss -K" to reset TCP
connections on "releaseip".  This is only supported when the kernel is
configured with CONFIG_INET_DIAG_DESTROY enabled.

From the documentation:

   ss -K has been supported in ss since iproute 4.5 in March 2016 and
   in the Linux kernel since 4.4 in December 2015.  However, the
   required kernel configuration item CONFIG_INET_DIAG_DESTROY is
   disabled by default.  Although enabled in Debian kernels since
   ~2017 and in Ubuntu since at least 18.04,, this has only recently
   been enabled in distributions such as RHEL.  There seems to be no
   way, including running ss -K, to determine if this is supported, so
   use of this feature needs to be configurable.  When available, it
   should be the fastest, most reliable way of killing connections.

For RHEL and derivatives, this was enabled as follows:

* RHEL 8 via https://bugzilla.redhat.com/show_bug.cgi?id=2230213,
  arriving in version kernel-4.18.0-513.5.1.el8_9

* RHEL 9 via https://issues.redhat.com/browse/RHEL-212, arriving in
  kernel-5.14.0-360.el9

Enabling this option results in a small behaviour change because ss -K
always does a 2-way kill (i.e. it also sends a RST to the client).
Only a 1-way kill is done for SMB connections when ctdb_killtcp is
used - the reasons for this are shrouded in history and the 2-way kill
seems to work fine.

For the summary that is logged, when CTDB_KILLTCP_USE_SS_KILL is "yes"
or "try", always log the method used, even the fallback to
ctdb_killtcp.  However, when set to "no", maintain the existing
output.

The decision to use -K rather than --kill is because short options are
trivial to implement in test stubs.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Jerry Heyman <jheyman@ddn.com>
Autobuild-User(master): Martin Schwenke <martins@samba.org>
Autobuild-Date(master): Thu Nov  7 00:12:34 UTC 2024 on atb-devel-224

ctdb/config/functions
ctdb/doc/ctdb-script.options.5.xml
ctdb/tests/UNIT/eventscripts/10.interface.020.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.021.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.022.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.023.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.030.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.031.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.032.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/10.interface.033.sh [new file with mode: 0755]
ctdb/tests/UNIT/eventscripts/stubs/ss

index d1579e5156c3e00ea1d5bc90a3a9c7abf86bc85a..1ca3cebbbca0e9a3fe2246026e09227806559ed0 100755 (executable)
@@ -514,6 +514,7 @@ kill_tcp_summarise()
 {
        _mode="$1"
        _count="$2"
+       _method="$3"
 
        _connections=$(get_tcp_connections_for_ip "$_ip")
        if [ -z "$_connections" ]; then
@@ -534,7 +535,11 @@ kill_tcp_summarise()
        esac
 
        _t="${_killed}/${_total}"
-       echo "Killed ${_t} TCP connections to released IP ${_ip}"
+       _m=""
+       if [ -n "$_method" ]; then
+               _m=", using ${_method}"
+       fi
+       echo "Killed ${_t} TCP connections to released IP ${_ip}${_m}"
        if [ -n "$_connections" ]; then
                echo "Remaining connections:"
                echo "$_connections" | sed -e 's|^|  |'
@@ -551,6 +556,16 @@ kill_tcp_connections()
                _oneway=true
        fi
 
+       case "$CTDB_KILLTCP_USE_SS_KILL" in
+       yes | try)
+               _killcount=$(ss -K -tnH state established src "$_ip" | wc -l)
+               kill_tcp_summarise "killed" "$_killcount" "ss -K"
+               if [ "$CTDB_KILLTCP_USE_SS_KILL" = "yes" ]; then
+                       return
+               fi
+               ;;
+       esac
+
        get_tcp_connections_for_ip "$_ip" | {
                _killcount=0
                _connections=""
@@ -588,7 +603,11 @@ kill_tcp_connections()
                        return
                }
 
-               kill_tcp_summarise "total" "$_killcount"
+               _method=""
+               if [ "$CTDB_KILLTCP_USE_SS_KILL" = "try" ]; then
+                       _method="ctdb_killtcp"
+               fi
+               kill_tcp_summarise "total" "$_killcount" "$_method"
        }
 }
 
index a0122c0e77e997c5f5bef0a265ac1f2f40c9598a..9298f9f3498a16ab8e95a3a01f80acf4f47d413e 100644 (file)
 
       <variablelist>
 
+       <varlistentry>
+         <term>
+           CTDB_KILLTCP_USE_SS_KILL=yes|try|no
+         </term>
+         <listitem>
+           <para>
+             Whether to use <command>ss -K/--kill</command> to reset
+             incoming TCP connections to public IP addresses during
+             <command>releaseip</command>.
+           </para>
+
+           <para>
+             CTDB's standard method of resetting incoming TCP
+             connections during <command>releaseip</command> is via
+             its custom <command>ctdb_killtcp</command> command.
+             This uses network trickery to reset each connection:
+             send a "tickle ACK", capture the reply to extract the
+             TCP sequence number, send a reset (containing the
+             correct sequence number).
+           </para>
+
+           <para>
+             <command>ss -K</command> has been supported in
+             <command>ss</command> since iproute 4.5 in March 2016
+             and in the Linux kernel since 4.4 in December 2015.
+             However, the required kernel configuration item
+             <code>CONFIG_INET_DIAG_DESTROY</code> is disabled by
+             default.  Although enabled in Debian kernels since ~2017
+             and in Ubuntu since at least 18.04, this has only
+             recently been enabled in distributions such as RHEL.
+             There seems to be no way, including running <command>ss
+             -K</command>, to determine if this is supported, so use
+             of this feature needs to be configurable.  When
+             available, it should be the fastest, most reliable way
+             of killing connections.
+           </para>
+
+           <para>
+             Supported values are:
+             <variablelist>
+               <varlistentry>
+                 <term>
+                   yes
+                 </term>
+                 <listitem>
+                   <para>
+                     Use <command>ss -K</command> and make no other
+                     attempt to kill any remaining connections.  This
+                     is sane on modern Linux distributions that are
+                     guaranteed to have
+                     <code>CONFIG_INET_DIAG_DESTROY</code> enabled.
+                   </para>
+                 </listitem>
+               </varlistentry>
+
+               <varlistentry>
+                 <term>
+                   try
+                 </term>
+                 <listitem>
+                   <para>
+                     Attempt to use <command>ss -K</command> and fall
+                     back to <command>ctdb_killtcp</command> for any
+                     remaining connections.  This may be a good value
+                     when <command>ss</command> supports the
+                     <command>-K</command> option but it is uncertain
+                     whether <code>CONFIG_INET_DIAG_DESTROY</code> is
+                     enabled.
+                   </para>
+                 </listitem>
+               </varlistentry>
+
+               <varlistentry>
+                 <term>
+                   no
+                 </term>
+                 <listitem>
+                   <para>
+                     Never attempt to use <command>ss -K</command>.
+                     Rely only on <command>ctdb_killtcp</command>.
+                   </para>
+                 </listitem>
+               </varlistentry>
+             </variablelist>
+             Default is "no".
+           </para>
+         </listitem>
+       </varlistentry>
+
        <varlistentry>
          <term>
            CTDB_PARTIALLY_ONLINE_INTERFACES=yes|no
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.020.sh b/ctdb/tests/UNIT/eventscripts/10.interface.020.sh
new file mode 100755 (executable)
index 0000000..89e6c9a
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed OK, use ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="yes"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               ok <<EOF
+Killed ${count}/${count} TCP connections to released IP ${ip}, using ss -K
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.021.sh b/ctdb/tests/UNIT/eventscripts/10.interface.021.sh
new file mode 100755 (executable)
index 0000000..7018c1a
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 1 fails, use ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="yes"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               setup_tcp_connections_unkillable 1 \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 10/11 TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.022.sh b/ctdb/tests/UNIT/eventscripts/10.interface.022.sh
new file mode 100755 (executable)
index 0000000..015dccf
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 3 fail, use ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="yes"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               setup_tcp_connections_unkillable 3 \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 10/13 TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.023.sh b/ctdb/tests/UNIT/eventscripts/10.interface.023.sh
new file mode 100755 (executable)
index 0000000..b46089a
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, all 10 connections kills fail, use ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="yes"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               setup_tcp_connections 0
+
+               count=10
+               setup_tcp_connections_unkillable $count \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 0/${count} TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+  ${ip}:445 10.254.254.4:43214
+  ${ip}:445 10.254.254.5:43215
+  ${ip}:445 10.254.254.6:43216
+  ${ip}:445 10.254.254.7:43217
+  ${ip}:445 10.254.254.8:43218
+  ${ip}:445 10.254.254.9:43219
+  ${ip}:445 10.254.254.10:43220
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.030.sh b/ctdb/tests/UNIT/eventscripts/10.interface.030.sh
new file mode 100755 (executable)
index 0000000..0961d5c
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed OK, try ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="try"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               ok <<EOF
+Killed ${count}/${count} TCP connections to released IP ${ip}, using ss -K
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.031.sh b/ctdb/tests/UNIT/eventscripts/10.interface.031.sh
new file mode 100755 (executable)
index 0000000..65acec5
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 1 fails, try ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="try"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               setup_tcp_connections_unkillable 1 \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 10/11 TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+Killed 0/1 TCP connections to released IP ${ip}, using ctdb_killtcp
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.032.sh b/ctdb/tests/UNIT/eventscripts/10.interface.032.sh
new file mode 100755 (executable)
index 0000000..0bc9924
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, 10 connections killed, 3 fail, try ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="try"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               count=10
+
+               setup_tcp_connections $count \
+                       "$ip" 445 10.254.254.0 12300
+
+               setup_tcp_connections_unkillable 3 \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 10/13 TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+Killed 0/3 TCP connections to released IP ${ip}, using ctdb_killtcp
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
diff --git a/ctdb/tests/UNIT/eventscripts/10.interface.033.sh b/ctdb/tests/UNIT/eventscripts/10.interface.033.sh
new file mode 100755 (executable)
index 0000000..c9298f0
--- /dev/null
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Release 1 IP, all 10 connections kills fail, try ss -K"
+
+setup
+
+setup_script_options <<EOF
+CTDB_KILLTCP_USE_SS_KILL="try"
+EOF
+
+ctdb_get_1_public_address |
+       while read -r dev ip bits; do
+               ok_null
+               simple_test_event "takeip" "$dev" "$ip" "$bits"
+
+               setup_tcp_connections 0
+
+               count=10
+               setup_tcp_connections_unkillable $count \
+                       "$ip" 445 10.254.254.0 43210
+
+               ok <<EOF
+Killed 0/${count} TCP connections to released IP ${ip}, using ss -K
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+  ${ip}:445 10.254.254.4:43214
+  ${ip}:445 10.254.254.5:43215
+  ${ip}:445 10.254.254.6:43216
+  ${ip}:445 10.254.254.7:43217
+  ${ip}:445 10.254.254.8:43218
+  ${ip}:445 10.254.254.9:43219
+  ${ip}:445 10.254.254.10:43220
+Killed 0/${count} TCP connections to released IP ${ip}, using ctdb_killtcp
+Remaining connections:
+  ${ip}:445 10.254.254.1:43211
+  ${ip}:445 10.254.254.2:43212
+  ${ip}:445 10.254.254.3:43213
+  ${ip}:445 10.254.254.4:43214
+  ${ip}:445 10.254.254.5:43215
+  ${ip}:445 10.254.254.6:43216
+  ${ip}:445 10.254.254.7:43217
+  ${ip}:445 10.254.254.8:43218
+  ${ip}:445 10.254.254.9:43219
+  ${ip}:445 10.254.254.10:43220
+EOF
+
+               simple_test_event "releaseip" "$dev" "$ip" "$bits"
+       done
index 692de2595a44d3dbd93dc691db7bc054ff2a304d..a7589992f35cdfc112cd4f1b2867e538c45ce906 100755 (executable)
@@ -108,22 +108,33 @@ ss_tcp_established()
        # shellcheck disable=SC2048,SC2086
        parse_filter $*
 
-       for i in $FAKE_NETSTAT_TCP_ESTABLISHED; do
-               src="${i%|*}"
-               dst="${i#*|}"
-               if filter_socket "$srcs" "$sports" "$src"; then
-                       echo 0 0 "$src" "$dst"
-               fi
-       done
+       if ! "$kill"; then
+               for i in $FAKE_NETSTAT_TCP_ESTABLISHED; do
+                       src="${i%|*}"
+                       dst="${i#*|}"
+                       if filter_socket "$srcs" "$sports" "$src"; then
+                               echo 0 0 "$src" "$dst"
+                       fi
+               done
+       fi
 
        if [ -z "$FAKE_NETSTAT_TCP_ESTABLISHED_FILE" ]; then
                return
        fi
+       new="${FAKE_NETSTAT_TCP_ESTABLISHED_FILE}.new"
+       : >"$new"
        while read -r src dst; do
                if filter_socket "$srcs" "$sports" "$src"; then
                        echo 0 0 "$src" "$dst"
+               else
+                       echo "${src} ${dst}" >>"$new"
                fi
        done <"$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+       if "$kill"; then
+               mv "$new" "$FAKE_NETSTAT_TCP_ESTABLISHED_FILE"
+       else
+               rm "$new"
+       fi
 }
 
 ############################################################
@@ -160,16 +171,18 @@ unix=false
 all=false
 listen=false
 header=true
+kill=false
 
 orig="$*"
 
-while getopts "txnalHh?" opt; do
+while getopts "txnalHKh?" opt; do
        case "$opt" in
        t) tcp=true ;;
        x) unix=true ;;
        l) listen=true ;;
        a) all=true ;;
        H) header=false ;;
+       K) kill=true ;;
        n) : ;;
        \? | h) usage ;;
        esac