From 08310072aad7b80f180d42292cf7eeaa773dd1b8 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 28 Jun 2023 14:01:44 +1000 Subject: [PATCH] ctdb-scripts: Support storing statd-callout state in cluster filesystem CTDB_STATD_CALLOUT_SHARED_STORAGE is a new configuration variable indicating where statd-callout should store its NFS client locking data. See the update to ctdb-script.options(5) for details. This adds back functionality that was removed in commit 12cc82623150ca4a83482f1b7165401cbdecd3de. The commit message doesn't say why this was changed but it was most likely due to a cluster filesystem hanging at inopportune times. Hence, this is re-added as a non-default option. There are 2 justifications for re-adding it: * The existing method (persistent_db) relies on dequeuing data during the monitor event, which loses any queued data on node crash. * NFS-Ganesha writes NFSv4 client locking data to a cluster filesystem, by default. Something similar might as well exist for NFSv3. Note that this could create the files for sm-notify in add-client. However, this would require an alternate implementation of send_notifies() (or a change to the implementation for persistent_db too). It seems better to leave add-client lightweight and do the work in notify, since add-client is a more frequent operation. Unconditionally create the state directory on startup. This is currently implicitly created for persistent_db when the queue directory is created. However, it isn't created anywhere else for shared_dir, so do it in a common place. In test mode, the shared storage location has a prefix added so files are created within the test environment. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- ctdb/doc/ctdb-script.options.5.xml | 59 +++++++++++++++++ ctdb/failover/statd_callout.c | 59 ++++++++++++++++- ctdb/tools/statd_callout_helper | 102 ++++++++++++++++++++++++++++- 3 files changed, 216 insertions(+), 4 deletions(-) diff --git a/ctdb/doc/ctdb-script.options.5.xml b/ctdb/doc/ctdb-script.options.5.xml index 6df82227989..acfb9d082f8 100644 --- a/ctdb/doc/ctdb-script.options.5.xml +++ b/ctdb/doc/ctdb-script.options.5.xml @@ -1041,6 +1041,65 @@ CTDB_PER_IP_ROUTING_TABLE_ID_HIGH=9000 + + + CTDB_STATD_CALLOUT_SHARED_STORAGE=LOCATION + + + + LOCATION where NFSv3 statd state will be stored. Valid + values are: + + + + + persistent_db:TDB + + + + Data is queued to local storage and then dequeued + to TDB during monitor events. This means there is + a window where locking state may be lost. + However, this works around performance limitations + in CTDB's persistent database handling. + + + If :TDB is omitted then TDB defaults to + ctdb.tdb. + + + + + + shared_dir:DIRECTORY + + + + DIRECTORY is a directory in a cluster filesystem + that is shared between the nodes. If DIRECTORY is + relative (i.e. does not start with '/') then it is + appended to CTDB_NFS_SHARED_STATE_DIR. If + :DIRECTORY is omitted then DIRECTORY defaults to + statd. + + + Using a shared directory may result in performance + and/or stability problems. rpc.statd is + single-threaded and its HA callout is called + synchronously, causing any latency introduced by + the callout to be cumulative. Stability issues + are most likely if thousands of clients reclaim + locks after failover and use of the cluster + filesystem introduces too much additional + latency. Too much latency in in the HA callout + may cause rpc.statd to fail health monitoring. + + + + + + + diff --git a/ctdb/failover/statd_callout.c b/ctdb/failover/statd_callout.c index a02a624aedf..6123316c6a7 100644 --- a/ctdb/failover/statd_callout.c +++ b/ctdb/failover/statd_callout.c @@ -35,8 +35,9 @@ * The first line is the mode. Currently supported modes are: * * persistent_db + * shared_dir * - * In this mode, the file contains 2 subsequent lines of text: + * In these modes, the file contains 2 subsequent lines of text: * * path: directory where files should be created * ips_file: file containing node's currently assigned public IP addresses @@ -48,6 +49,7 @@ static const char *progname; struct { enum { CTDB_SC_MODE_PERSISTENT_DB, + CTDB_SC_MODE_SHARED_DIR, } mode; union { struct { @@ -87,6 +89,7 @@ static void free_config(void) { switch (config.mode) { case CTDB_SC_MODE_PERSISTENT_DB: + case CTDB_SC_MODE_SHARED_DIR: free(config.path); config.path = NULL; free(config.ips_file); @@ -127,6 +130,8 @@ static void read_config(void) } if (strcmp(mode, "persistent_db") == 0) { config.mode = CTDB_SC_MODE_PERSISTENT_DB; + } else if (strcmp(mode, "shared_dir") == 0) { + config.mode = CTDB_SC_MODE_SHARED_DIR; } else { fprintf(stderr, "%s: unknown mode=%s in %s\n", @@ -140,6 +145,7 @@ static void read_config(void) switch (config.mode) { case CTDB_SC_MODE_PERSISTENT_DB: + case CTDB_SC_MODE_SHARED_DIR: status = getline_strip(&config.path, &n, f); if (!status) { goto parse_error; @@ -288,6 +294,51 @@ static void del_client_persistent_db(const char *cip) for_each_sip(del_client_persistent_db_line, cip); } +static void add_client_shared_dir_line(const char *sip, const char *cip) +{ + char path[PATH_MAX]; + FILE *f; + + make_path(path, sizeof(path), sip, cip); + + f = fopen(path, "w"); + if (f == NULL) { + fprintf(stderr, + "%s: unable to open for writing %s\n", + progname, + path); + exit(1); + } + fclose(f); +} + +static void add_client_shared_dir(const char *cip) +{ + for_each_sip(add_client_shared_dir_line, cip); +} + +static void del_client_shared_dir_line(const char *sip, const char *cip) +{ + char path[PATH_MAX]; + int ret; + + make_path(path, sizeof(path), sip, cip); + + ret = unlink(path); + if (ret != 0) { + fprintf(stderr, + "%s: unable to remove %s\n", + progname, + path); + exit(1); + } +} + +static void del_client_shared_dir(const char *cip) +{ + for_each_sip(del_client_shared_dir_line, cip); +} + static void usage(void) { printf("usage: %s: { add-client | del-client } \n", progname); @@ -313,6 +364,9 @@ int main(int argc, const char *argv[]) case CTDB_SC_MODE_PERSISTENT_DB: add_client_persistent_db(mon_name); break; + case CTDB_SC_MODE_SHARED_DIR: + add_client_shared_dir(mon_name); + break; } } else if (strcmp(event, "del-client") == 0) { mon_name = argv[2]; @@ -320,6 +374,9 @@ int main(int argc, const char *argv[]) case CTDB_SC_MODE_PERSISTENT_DB: del_client_persistent_db(mon_name); break; + case CTDB_SC_MODE_SHARED_DIR: + del_client_shared_dir(mon_name); + break; } } else { usage(); diff --git a/ctdb/tools/statd_callout_helper b/ctdb/tools/statd_callout_helper index bc7a4bf2842..0ef88f06c96 100755 --- a/ctdb/tools/statd_callout_helper +++ b/ctdb/tools/statd_callout_helper @@ -38,6 +38,8 @@ die() exit 1 } +load_script_options "service" "60.nfs" + ############################################################ ctdb_setup_state_dir "service" "nfs" @@ -79,9 +81,46 @@ create_add_del_client_dir() # shellcheck disable=SC2154 statd_callout_state_dir="${script_state_dir}/statd_callout" -statd_callout_mode="persistent_db" -statd_callout_db="ctdb.tdb" -statd_callout_queue_dir="${statd_callout_state_dir}/queue" +# Set default value, if necessary +: "${CTDB_STATD_CALLOUT_SHARED_STORAGE:=persistent_db}" + +statd_callout_mode="${CTDB_STATD_CALLOUT_SHARED_STORAGE%%:*}" +statd_callout_location="${CTDB_STATD_CALLOUT_SHARED_STORAGE#*:}" +# If not given then mode determines the default location +if [ "$statd_callout_location" = "$CTDB_STATD_CALLOUT_SHARED_STORAGE" ]; then + statd_callout_location="" +fi + +case "$statd_callout_mode" in +persistent_db) + statd_callout_db="${statd_callout_location:-ctdb.tdb}" + statd_callout_queue_dir="${statd_callout_state_dir}/queue" + ;; +shared_dir) + statd_callout_shared_dir="${statd_callout_location:-statd}" + case "$statd_callout_shared_dir" in + /*) + : + ;; + *) + if [ -z "$CTDB_NFS_SHARED_STATE_DIR" ]; then + die "CTDB_NFS_SHARED_STATE_DIR is not set" + fi + t="${CTDB_NFS_SHARED_STATE_DIR}/${statd_callout_shared_dir}" + statd_callout_shared_dir="$t" + ;; + esac + + if [ -n "$CTDB_TEST_MODE" ]; then + t="${CTDB_TEST_TMP_DIR}${statd_callout_shared_dir}" + statd_callout_shared_dir="$t" + fi + ;; +*) + mode="$statd_callout_mode" + die "error: unknown CTDB_STATD_CALLOUT_SHARED_STORAGE mode ${mode}" + ;; +esac ############################################################ @@ -241,12 +280,69 @@ cleanup_persistent_db() ############################################################ +# Use file/key names of the form statd-state@@ +# to track the "add-client" and "del-client". statd_callout add and +# removes files directly in $statd_callout_shared_dir. This may +# result in performance problems if thousands of clients reclaim locks +# after failover and the cluster filesystem is unable to handle the +# load. + +startup_shared_dir() +{ + _config_file="$1" + + create_add_del_client_dir "$statd_callout_shared_dir" + + cat >"$_config_file" <