]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-scripts: Support storing statd-callout state in cluster filesystem
authorMartin Schwenke <mschwenke@ddn.com>
Wed, 28 Jun 2023 04:01:44 +0000 (14:01 +1000)
committerAmitay Isaacs <amitay@samba.org>
Fri, 13 Dec 2024 13:57:32 +0000 (13:57 +0000)
CTDB_STATD_CALLOUT_SHARED_STORAGE is a new configuration variable
indicating where statd-callout should store its NFS client locking
data.  See the update to ctdb-script.options(5) for details.

This adds back functionality that was removed in commit
12cc82623150ca4a83482f1b7165401cbdecd3de.  The commit message doesn't
say why this was changed but it was most likely due to a cluster
filesystem hanging at inopportune times.  Hence, this is re-added as a
non-default option.  There are 2 justifications for re-adding it:

* The existing method (persistent_db) relies on dequeuing data during
  the monitor event, which loses any queued data on node crash.

* NFS-Ganesha writes NFSv4 client locking data to a cluster
  filesystem, by default.  Something similar might as well exist for
  NFSv3.

Note that this could create the files for sm-notify in add-client.
However, this would require an alternate implementation of
send_notifies() (or a change to the implementation for persistent_db
too).  It seems better to leave add-client lightweight and do the work
in notify, since add-client is a more frequent operation.

Unconditionally create the state directory on startup.  This is
currently implicitly created for persistent_db when the queue
directory is created.  However, it isn't created anywhere else for
shared_dir, so do it in a common place.

In test mode, the shared storage location has a prefix added so files
are created within the test environment.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/doc/ctdb-script.options.5.xml
ctdb/failover/statd_callout.c
ctdb/tools/statd_callout_helper

index 6df82227989833b3366bee4c51d78b70e334244f..acfb9d082f87812050d33406af58cabded712ed4 100644 (file)
@@ -1041,6 +1041,65 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000
          </listitem>
        </varlistentry>
 
+       <varlistentry>
+         <term>
+           CTDB_STATD_CALLOUT_SHARED_STORAGE=<parameter>LOCATION</parameter>
+         </term>
+         <listitem>
+           <para>
+             LOCATION where NFSv3 statd state will be stored.  Valid
+             values are:
+           </para>
+           <variablelist>
+             <varlistentry>
+               <term>
+                 persistent_db<optional>:<parameter>TDB</parameter></optional>
+               </term>
+               <listitem>
+                 <para>
+                   Data is queued to local storage and then dequeued
+                   to TDB during monitor events.  This means there is
+                   a window where locking state may be lost.
+                   However, this works around performance limitations
+                   in CTDB's persistent database handling.
+                 </para>
+                 <para>
+                   If :TDB is omitted then TDB defaults to
+                   <filename>ctdb.tdb</filename>.
+                 </para>
+               </listitem>
+             </varlistentry>
+             <varlistentry>
+               <term>
+                 shared_dir<optional>:<parameter>DIRECTORY</parameter></optional>
+               </term>
+               <listitem>
+                 <para>
+                   DIRECTORY is a directory in a cluster filesystem
+                   that is shared between the nodes.  If DIRECTORY is
+                   relative (i.e. does not start with '/') then it is
+                   appended to CTDB_NFS_SHARED_STATE_DIR.  If
+                   :DIRECTORY is omitted then DIRECTORY defaults to
+                   <filename>statd</filename>.
+                 </para>
+                 <para>
+                   Using a shared directory may result in performance
+                   and/or stability problems.  rpc.statd is
+                   single-threaded and its HA callout is called
+                   synchronously, causing any latency introduced by
+                   the callout to be cumulative.  Stability issues
+                   are most likely if thousands of clients reclaim
+                   locks after failover and use of the cluster
+                   filesystem introduces too much additional
+                   latency.  Too much latency in in the HA callout
+                   may cause rpc.statd to fail health monitoring.
+                 </para>
+               </listitem>
+             </varlistentry>
+           </variablelist>
+         </listitem>
+       </varlistentry>
+
       </variablelist>
 
     </refsect2>
index a02a624aedfcd22ad6878f3ebb99d149966779c3..6123316c6a74c024112954430addafd3bfab3f36 100644 (file)
@@ -35,8 +35,9 @@
  * The first line is the mode.  Currently supported modes are:
  *
  *   persistent_db
+ *   shared_dir
  *
- * In this mode, the file contains 2 subsequent lines of text:
+ * In these modes, the file contains 2 subsequent lines of text:
  *
  *   path: directory where files should be created
  *   ips_file: file containing node's currently assigned public IP addresses
@@ -48,6 +49,7 @@ static const char *progname;
 struct {
        enum {
                CTDB_SC_MODE_PERSISTENT_DB,
+               CTDB_SC_MODE_SHARED_DIR,
        } mode;
        union {
                struct {
@@ -87,6 +89,7 @@ static void free_config(void)
 {
        switch (config.mode) {
        case CTDB_SC_MODE_PERSISTENT_DB:
+       case CTDB_SC_MODE_SHARED_DIR:
                free(config.path);
                config.path = NULL;
                free(config.ips_file);
@@ -127,6 +130,8 @@ static void read_config(void)
        }
        if (strcmp(mode, "persistent_db") == 0) {
                config.mode = CTDB_SC_MODE_PERSISTENT_DB;
+       } else if (strcmp(mode, "shared_dir") == 0) {
+               config.mode = CTDB_SC_MODE_SHARED_DIR;
        } else {
                fprintf(stderr,
                        "%s: unknown mode=%s in %s\n",
@@ -140,6 +145,7 @@ static void read_config(void)
 
        switch (config.mode) {
        case CTDB_SC_MODE_PERSISTENT_DB:
+       case CTDB_SC_MODE_SHARED_DIR:
                status = getline_strip(&config.path, &n, f);
                if (!status) {
                        goto parse_error;
@@ -288,6 +294,51 @@ static void del_client_persistent_db(const char *cip)
        for_each_sip(del_client_persistent_db_line, cip);
 }
 
+static void add_client_shared_dir_line(const char *sip, const char *cip)
+{
+       char path[PATH_MAX];
+       FILE *f;
+
+       make_path(path, sizeof(path), sip, cip);
+
+       f = fopen(path, "w");
+       if (f == NULL) {
+               fprintf(stderr,
+                       "%s: unable to open for writing %s\n",
+                       progname,
+                       path);
+               exit(1);
+       }
+       fclose(f);
+}
+
+static void add_client_shared_dir(const char *cip)
+{
+       for_each_sip(add_client_shared_dir_line, cip);
+}
+
+static void del_client_shared_dir_line(const char *sip, const char *cip)
+{
+       char path[PATH_MAX];
+       int ret;
+
+       make_path(path, sizeof(path), sip, cip);
+
+       ret = unlink(path);
+       if (ret != 0) {
+               fprintf(stderr,
+                       "%s: unable to remove %s\n",
+                       progname,
+                       path);
+               exit(1);
+       }
+}
+
+static void del_client_shared_dir(const char *cip)
+{
+       for_each_sip(del_client_shared_dir_line, cip);
+}
+
 static void usage(void)
 {
        printf("usage: %s: { add-client | del-client } <client-ip>\n", progname);
@@ -313,6 +364,9 @@ int main(int argc, const char *argv[])
                case CTDB_SC_MODE_PERSISTENT_DB:
                        add_client_persistent_db(mon_name);
                        break;
+               case CTDB_SC_MODE_SHARED_DIR:
+                       add_client_shared_dir(mon_name);
+                       break;
                }
        } else if (strcmp(event, "del-client") == 0) {
                mon_name = argv[2];
@@ -320,6 +374,9 @@ int main(int argc, const char *argv[])
                case CTDB_SC_MODE_PERSISTENT_DB:
                        del_client_persistent_db(mon_name);
                        break;
+               case CTDB_SC_MODE_SHARED_DIR:
+                       del_client_shared_dir(mon_name);
+                       break;
                }
        } else {
                usage();
index bc7a4bf284284cb1e713269ca4efc77fdc155cfd..0ef88f06c96e5b3f291066a0859584bd1efb4cf3 100755 (executable)
@@ -38,6 +38,8 @@ die()
        exit 1
 }
 
+load_script_options "service" "60.nfs"
+
 ############################################################
 
 ctdb_setup_state_dir "service" "nfs"
@@ -79,9 +81,46 @@ create_add_del_client_dir()
 # shellcheck disable=SC2154
 statd_callout_state_dir="${script_state_dir}/statd_callout"
 
-statd_callout_mode="persistent_db"
-statd_callout_db="ctdb.tdb"
-statd_callout_queue_dir="${statd_callout_state_dir}/queue"
+# Set default value, if necessary
+: "${CTDB_STATD_CALLOUT_SHARED_STORAGE:=persistent_db}"
+
+statd_callout_mode="${CTDB_STATD_CALLOUT_SHARED_STORAGE%%:*}"
+statd_callout_location="${CTDB_STATD_CALLOUT_SHARED_STORAGE#*:}"
+# If not given then mode determines the default location
+if [ "$statd_callout_location" = "$CTDB_STATD_CALLOUT_SHARED_STORAGE" ]; then
+       statd_callout_location=""
+fi
+
+case "$statd_callout_mode" in
+persistent_db)
+       statd_callout_db="${statd_callout_location:-ctdb.tdb}"
+       statd_callout_queue_dir="${statd_callout_state_dir}/queue"
+       ;;
+shared_dir)
+       statd_callout_shared_dir="${statd_callout_location:-statd}"
+       case "$statd_callout_shared_dir" in
+       /*)
+               :
+               ;;
+       *)
+               if [ -z "$CTDB_NFS_SHARED_STATE_DIR" ]; then
+                       die "CTDB_NFS_SHARED_STATE_DIR is not set"
+               fi
+               t="${CTDB_NFS_SHARED_STATE_DIR}/${statd_callout_shared_dir}"
+               statd_callout_shared_dir="$t"
+               ;;
+       esac
+
+       if [ -n "$CTDB_TEST_MODE" ]; then
+               t="${CTDB_TEST_TMP_DIR}${statd_callout_shared_dir}"
+               statd_callout_shared_dir="$t"
+       fi
+       ;;
+*)
+       mode="$statd_callout_mode"
+       die "error: unknown CTDB_STATD_CALLOUT_SHARED_STORAGE mode ${mode}"
+       ;;
+esac
 
 ############################################################
 
@@ -241,12 +280,69 @@ cleanup_persistent_db()
 
 ############################################################
 
+# Use file/key names of the form statd-state@<server-IP>@<client-IP>
+# to track the "add-client" and "del-client".  statd_callout add and
+# removes files directly in $statd_callout_shared_dir.  This may
+# result in performance problems if thousands of clients reclaim locks
+# after failover and the cluster filesystem is unable to handle the
+# load.
+
+startup_shared_dir()
+{
+       _config_file="$1"
+
+       create_add_del_client_dir "$statd_callout_shared_dir"
+
+       cat >"$_config_file" <<EOF
+shared_dir
+${statd_callout_shared_dir}
+${CTDB_MY_PUBLIC_IPS_CACHE}
+EOF
+}
+
+update_shared_dir()
+{
+       :
+}
+
+list_records_shared_dir()
+{
+       while read -r _ip; do
+               ls "${statd_callout_shared_dir}/statd-state@${_ip}@"*
+       done <"$CTDB_MY_PUBLIC_IPS_CACHE" |
+               while read -r _f; do
+                       if [ ! -f "$_f" ]; then
+                               continue
+                       fi
+                       _t="${_f#"${statd_callout_shared_dir}/statd-state@"}"
+                       _sip="${_t%@*}"
+                       _cip="${_t#*@}"
+                       echo "$_sip" "$_cip"
+               done
+}
+
+delete_records_shared_dir()
+{
+       while read -r _sip _cip; do
+               echo "${statd_callout_shared_dir}/statd-state@${_sip}@${_cip}"
+       done | xargs rm -f
+}
+
+cleanup_shared_dir()
+{
+       :
+}
+
+############################################################
+
 # Per-mode initialisation
 startup()
 {
        _default="${CTDB_SCRIPT_VARDIR}/statd_callout.conf"
        _config_file="${CTDB_STATD_CALLOUT_CONFIG_FILE:-"${_default}"}"
 
+       mkdir -p "$statd_callout_state_dir"
+
        "startup_${statd_callout_mode}" "$_config_file"
 }