CTDB_STATD_CALLOUT_SHARED_STORAGE is a new configuration variable
indicating where statd-callout should store its NFS client locking
data. See the update to ctdb-script.options(5) for details.
This adds back functionality that was removed in commit
12cc82623150ca4a83482f1b7165401cbdecd3de. The commit message doesn't
say why this was changed but it was most likely due to a cluster
filesystem hanging at inopportune times. Hence, this is re-added as a
non-default option. There are 2 justifications for re-adding it:
* The existing method (persistent_db) relies on dequeuing data during
the monitor event, which loses any queued data on node crash.
* NFS-Ganesha writes NFSv4 client locking data to a cluster
filesystem, by default. Something similar might as well exist for
NFSv3.
Note that this could create the files for sm-notify in add-client.
However, this would require an alternate implementation of
send_notifies() (or a change to the implementation for persistent_db
too). It seems better to leave add-client lightweight and do the work
in notify, since add-client is a more frequent operation.
Unconditionally create the state directory on startup. This is
currently implicitly created for persistent_db when the queue
directory is created. However, it isn't created anywhere else for
shared_dir, so do it in a common place.
In test mode, the shared storage location has a prefix added so files
are created within the test environment.
Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ CTDB_STATD_CALLOUT_SHARED_STORAGE=<parameter>LOCATION</parameter>
+ </term>
+ <listitem>
+ <para>
+ LOCATION where NFSv3 statd state will be stored. Valid
+ values are:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term>
+ persistent_db<optional>:<parameter>TDB</parameter></optional>
+ </term>
+ <listitem>
+ <para>
+ Data is queued to local storage and then dequeued
+ to TDB during monitor events. This means there is
+ a window where locking state may be lost.
+ However, this works around performance limitations
+ in CTDB's persistent database handling.
+ </para>
+ <para>
+ If :TDB is omitted then TDB defaults to
+ <filename>ctdb.tdb</filename>.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>
+ shared_dir<optional>:<parameter>DIRECTORY</parameter></optional>
+ </term>
+ <listitem>
+ <para>
+ DIRECTORY is a directory in a cluster filesystem
+ that is shared between the nodes. If DIRECTORY is
+ relative (i.e. does not start with '/') then it is
+ appended to CTDB_NFS_SHARED_STATE_DIR. If
+ :DIRECTORY is omitted then DIRECTORY defaults to
+ <filename>statd</filename>.
+ </para>
+ <para>
+ Using a shared directory may result in performance
+ and/or stability problems. rpc.statd is
+ single-threaded and its HA callout is called
+ synchronously, causing any latency introduced by
+ the callout to be cumulative. Stability issues
+ are most likely if thousands of clients reclaim
+ locks after failover and use of the cluster
+ filesystem introduces too much additional
+ latency. Too much latency in in the HA callout
+ may cause rpc.statd to fail health monitoring.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</refsect2>
* The first line is the mode. Currently supported modes are:
*
* persistent_db
+ * shared_dir
*
- * In this mode, the file contains 2 subsequent lines of text:
+ * In these modes, the file contains 2 subsequent lines of text:
*
* path: directory where files should be created
* ips_file: file containing node's currently assigned public IP addresses
struct {
enum {
CTDB_SC_MODE_PERSISTENT_DB,
+ CTDB_SC_MODE_SHARED_DIR,
} mode;
union {
struct {
{
switch (config.mode) {
case CTDB_SC_MODE_PERSISTENT_DB:
+ case CTDB_SC_MODE_SHARED_DIR:
free(config.path);
config.path = NULL;
free(config.ips_file);
}
if (strcmp(mode, "persistent_db") == 0) {
config.mode = CTDB_SC_MODE_PERSISTENT_DB;
+ } else if (strcmp(mode, "shared_dir") == 0) {
+ config.mode = CTDB_SC_MODE_SHARED_DIR;
} else {
fprintf(stderr,
"%s: unknown mode=%s in %s\n",
switch (config.mode) {
case CTDB_SC_MODE_PERSISTENT_DB:
+ case CTDB_SC_MODE_SHARED_DIR:
status = getline_strip(&config.path, &n, f);
if (!status) {
goto parse_error;
for_each_sip(del_client_persistent_db_line, cip);
}
+static void add_client_shared_dir_line(const char *sip, const char *cip)
+{
+ char path[PATH_MAX];
+ FILE *f;
+
+ make_path(path, sizeof(path), sip, cip);
+
+ f = fopen(path, "w");
+ if (f == NULL) {
+ fprintf(stderr,
+ "%s: unable to open for writing %s\n",
+ progname,
+ path);
+ exit(1);
+ }
+ fclose(f);
+}
+
+static void add_client_shared_dir(const char *cip)
+{
+ for_each_sip(add_client_shared_dir_line, cip);
+}
+
+static void del_client_shared_dir_line(const char *sip, const char *cip)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ make_path(path, sizeof(path), sip, cip);
+
+ ret = unlink(path);
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: unable to remove %s\n",
+ progname,
+ path);
+ exit(1);
+ }
+}
+
+static void del_client_shared_dir(const char *cip)
+{
+ for_each_sip(del_client_shared_dir_line, cip);
+}
+
static void usage(void)
{
printf("usage: %s: { add-client | del-client } <client-ip>\n", progname);
case CTDB_SC_MODE_PERSISTENT_DB:
add_client_persistent_db(mon_name);
break;
+ case CTDB_SC_MODE_SHARED_DIR:
+ add_client_shared_dir(mon_name);
+ break;
}
} else if (strcmp(event, "del-client") == 0) {
mon_name = argv[2];
case CTDB_SC_MODE_PERSISTENT_DB:
del_client_persistent_db(mon_name);
break;
+ case CTDB_SC_MODE_SHARED_DIR:
+ del_client_shared_dir(mon_name);
+ break;
}
} else {
usage();
exit 1
}
+load_script_options "service" "60.nfs"
+
############################################################
ctdb_setup_state_dir "service" "nfs"
# shellcheck disable=SC2154
statd_callout_state_dir="${script_state_dir}/statd_callout"
-statd_callout_mode="persistent_db"
-statd_callout_db="ctdb.tdb"
-statd_callout_queue_dir="${statd_callout_state_dir}/queue"
+# Set default value, if necessary
+: "${CTDB_STATD_CALLOUT_SHARED_STORAGE:=persistent_db}"
+
+statd_callout_mode="${CTDB_STATD_CALLOUT_SHARED_STORAGE%%:*}"
+statd_callout_location="${CTDB_STATD_CALLOUT_SHARED_STORAGE#*:}"
+# If not given then mode determines the default location
+if [ "$statd_callout_location" = "$CTDB_STATD_CALLOUT_SHARED_STORAGE" ]; then
+ statd_callout_location=""
+fi
+
+case "$statd_callout_mode" in
+persistent_db)
+ statd_callout_db="${statd_callout_location:-ctdb.tdb}"
+ statd_callout_queue_dir="${statd_callout_state_dir}/queue"
+ ;;
+shared_dir)
+ statd_callout_shared_dir="${statd_callout_location:-statd}"
+ case "$statd_callout_shared_dir" in
+ /*)
+ :
+ ;;
+ *)
+ if [ -z "$CTDB_NFS_SHARED_STATE_DIR" ]; then
+ die "CTDB_NFS_SHARED_STATE_DIR is not set"
+ fi
+ t="${CTDB_NFS_SHARED_STATE_DIR}/${statd_callout_shared_dir}"
+ statd_callout_shared_dir="$t"
+ ;;
+ esac
+
+ if [ -n "$CTDB_TEST_MODE" ]; then
+ t="${CTDB_TEST_TMP_DIR}${statd_callout_shared_dir}"
+ statd_callout_shared_dir="$t"
+ fi
+ ;;
+*)
+ mode="$statd_callout_mode"
+ die "error: unknown CTDB_STATD_CALLOUT_SHARED_STORAGE mode ${mode}"
+ ;;
+esac
############################################################
############################################################
+# Use file/key names of the form statd-state@<server-IP>@<client-IP>
+# to track the "add-client" and "del-client". statd_callout add and
+# removes files directly in $statd_callout_shared_dir. This may
+# result in performance problems if thousands of clients reclaim locks
+# after failover and the cluster filesystem is unable to handle the
+# load.
+
+startup_shared_dir()
+{
+ _config_file="$1"
+
+ create_add_del_client_dir "$statd_callout_shared_dir"
+
+ cat >"$_config_file" <<EOF
+shared_dir
+${statd_callout_shared_dir}
+${CTDB_MY_PUBLIC_IPS_CACHE}
+EOF
+}
+
+update_shared_dir()
+{
+ :
+}
+
+list_records_shared_dir()
+{
+ while read -r _ip; do
+ ls "${statd_callout_shared_dir}/statd-state@${_ip}@"*
+ done <"$CTDB_MY_PUBLIC_IPS_CACHE" |
+ while read -r _f; do
+ if [ ! -f "$_f" ]; then
+ continue
+ fi
+ _t="${_f#"${statd_callout_shared_dir}/statd-state@"}"
+ _sip="${_t%@*}"
+ _cip="${_t#*@}"
+ echo "$_sip" "$_cip"
+ done
+}
+
+delete_records_shared_dir()
+{
+ while read -r _sip _cip; do
+ echo "${statd_callout_shared_dir}/statd-state@${_sip}@${_cip}"
+ done | xargs rm -f
+}
+
+cleanup_shared_dir()
+{
+ :
+}
+
+############################################################
+
# Per-mode initialisation
startup()
{
_default="${CTDB_SCRIPT_VARDIR}/statd_callout.conf"
_config_file="${CTDB_STATD_CALLOUT_CONFIG_FILE:-"${_default}"}"
+ mkdir -p "$statd_callout_state_dir"
+
"startup_${statd_callout_mode}" "$_config_file"
}