ctdb-config: Add configuration option [cluster] leader timeout

author Martin Schwenke <martin@meltin.net>

Sat, 15 Jan 2022 02:02:02 +0000 (13:02 +1100)

committer Martin Schwenke <martins@samba.org>

Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
author Martin Schwenke <martin@meltin.net>
Sat, 15 Jan 2022 02:02:02 +0000 (13:02 +1100)
committer Martin Schwenke <martins@samba.org>
Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
diff --git a/ctdb/cluster/cluster_conf.c b/ctdb/cluster/cluster_conf.c

index c2fd3e1fe7873ebe2d8901cb35b571f0b39f9dfc..bdd64ba112f25c1b243ee9c999bc12d4284308e6 100644 (file)
--- a/ctdb/cluster/cluster_conf.c
+++ b/ctdb/cluster/cluster_conf.c
@@ -131,6 +131,20 @@ static bool validate_recovery_lock(const char *key,
         return status;
  }
  
+static bool validate_leader_timeout(const char *key,
+                                   int old_timeout,
+                                   int new_timeout,
+                                   enum conf_update_mode mode)
+{
+       if (new_timeout <= 0) {
+               D_ERR("Invalid value for [cluster] -> leader timeout = %d\n",
+                     new_timeout);
+               return false;
+       }
+
+       return true;
+}
+
  void cluster_conf_init(struct conf_context *conf)
  {
         conf_define_section(conf, CLUSTER_CONF_SECTION, NULL);
@@ -155,6 +169,11 @@ void cluster_conf_init(struct conf_context *conf)
                            CLUSTER_CONF_RECOVERY_LOCK,
                            NULL,
                            validate_recovery_lock);
+       conf_define_integer(conf,
+                           CLUSTER_CONF_SECTION,
+                           CLUSTER_CONF_LEADER_TIMEOUT,
+                           5,
+                           validate_leader_timeout);
         conf_define_boolean(conf,
                             CLUSTER_CONF_SECTION,
                             CLUSTER_CONF_LEADER_CAPABILITY,
diff --git a/ctdb/cluster/cluster_conf.h b/ctdb/cluster/cluster_conf.h

index 32334f1a5b4fa0f031087d7a58905e7beccc9c85..38c378fd57185d7d2e3b1e5019b32065bd3626a3 100644 (file)
--- a/ctdb/cluster/cluster_conf.h
+++ b/ctdb/cluster/cluster_conf.h
@@ -28,6 +28,7 @@
  #define CLUSTER_CONF_NODE_ADDRESS    "node address"
  #define CLUSTER_CONF_CLUSTER_LOCK    "cluster lock"
  #define CLUSTER_CONF_RECOVERY_LOCK   "recovery lock"
+#define CLUSTER_CONF_LEADER_TIMEOUT  "leader timeout"
  #define CLUSTER_CONF_LEADER_CAPABILITY "leader capability"
  
  void cluster_conf_init(struct conf_context *conf);
diff --git a/ctdb/doc/ctdb.conf.5.xml b/ctdb/doc/ctdb.conf.5.xml

index ae65f8fae4bc6e976967043b42004b5cb3f65201..87a7ea594ea620fed3e5120fb40d17f97ed137d4 100644 (file)
--- a/ctdb/doc/ctdb.conf.5.xml
+++ b/ctdb/doc/ctdb.conf.5.xml
@@ -218,6 +218,19 @@
         </listitem>
        </varlistentry>
  
+      <varlistentry>
+       <term>leader timeout = <parameter>SECONDS</parameter></term>
+       <listitem>
+         <para>
+           Number of SECONDS without a leader broadcast before a node
+           triggers an election.
+         </para>
+         <para>
+           Default: <literal>5</literal>
+         </para>
+       </listitem>
+      </varlistentry>
+
        <varlistentry>
         <term>node address = <parameter>IPADDR</parameter></term>
         <listitem>
diff --git a/ctdb/server/ctdb_config.c b/ctdb/server/ctdb_config.c

index 5eabf36501c5fb188dd77cbe2efcc9a870de858d..72830278c4299fa0b95374792b28f7da99a6cbfa 100644 (file)
--- a/ctdb/server/ctdb_config.c
+++ b/ctdb/server/ctdb_config.c
@@ -57,6 +57,10 @@ static void setup_config_pointers(struct conf_context *conf)
                                    CLUSTER_CONF_SECTION,
                                    CLUSTER_CONF_RECOVERY_LOCK,
                                    &ctdb_config.recovery_lock);
+       conf_assign_integer_pointer(conf,
+                                   CLUSTER_CONF_SECTION,
+                                   CLUSTER_CONF_LEADER_TIMEOUT,
+                                   &ctdb_config.leader_timeout);
         conf_assign_boolean_pointer(conf,
                                     CLUSTER_CONF_SECTION,
                                     CLUSTER_CONF_LEADER_CAPABILITY,
diff --git a/ctdb/server/ctdb_config.h b/ctdb/server/ctdb_config.h

index 1749038617fc6bb8168bd106155635c1d797bc6f..7ccda7d5d53d5b0be7c5c98c8ab393d82d3c35bf 100644 (file)
--- a/ctdb/server/ctdb_config.h
+++ b/ctdb/server/ctdb_config.h
@@ -28,6 +28,7 @@ struct ctdb_config {
         const char *node_address;
         const char *cluster_lock;
         const char *recovery_lock;
+       int leader_timeout;
         bool leader_capability;
  
         /* Database */
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c

index c2a48a07b4ef00f16727368939935fe78ffd436a..cc239959c56826340e9cce3c5548c848d2c9d875 100644 (file)
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -48,8 +48,6 @@
  
  #include "ctdb_cluster_mutex.h"
  
-#define LEADER_BROADCAST_TIMEOUT 5
-
  /* List of SRVID requests that need to be processed */
  struct srvid_list {
         struct srvid_list *next, *prev;
@@ -253,7 +251,6 @@ struct ctdb_recoverd {
         uint32_t leader;
         struct tevent_timer *leader_broadcast_te;
         struct tevent_timer *leader_broadcast_timeout_te;
-       unsigned int leader_broadcast_timeout;
         uint32_t pnn;
         uint32_t last_culprit_node;
         struct ctdb_node_map_old *nodemap;
@@ -2006,7 +2003,7 @@ static int leader_broadcast_timeout_start(struct ctdb_recoverd *rec)
                 tevent_add_timer(
                         ctdb->ev,
                         rec,
-                       timeval_current_ofs(rec->leader_broadcast_timeout, 0),
+                       timeval_current_ofs(ctdb_config.leader_timeout, 0),
                         leader_broadcast_timeout_handler,
                         rec);
         if (rec->leader_broadcast_timeout_te == NULL) {
@@ -2979,7 +2976,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
         rec->leader = CTDB_UNKNOWN_PNN;
         rec->pnn = ctdb_get_pnn(ctdb);
         rec->cluster_lock_handle = NULL;
-       rec->leader_broadcast_timeout = LEADER_BROADCAST_TIMEOUT;
         rec->helper_pid = -1;
  
         rec->takeover_run = ctdb_op_init(rec, "takeover runs");
diff --git a/ctdb/tests/UNIT/cunit/config_test_001.sh b/ctdb/tests/UNIT/cunit/config_test_001.sh

index 1f674a62b32a3dce4067291039742077363c5456..5dd4581996864f9d194098a1785c49de715fcb93 100755 (executable)
--- a/ctdb/tests/UNIT/cunit/config_test_001.sh
+++ b/ctdb/tests/UNIT/cunit/config_test_001.sh
@@ -36,6 +36,7 @@ ok <<EOF
         # node address = 
         # cluster lock = 
         # recovery lock = 
+       # leader timeout = 5
         # leader capability = true
  [database]
         # volatile database directory = ${database_volatile_dbdir}
diff --git a/ctdb/tests/UNIT/cunit/config_test_004.sh b/ctdb/tests/UNIT/cunit/config_test_004.sh

index bcfcc80e0ad20bab9e751ad2095ee5e2f81d7fc3..ebbc05b63292ab84247cde1e3fcfefa93c43f3c6 100755 (executable)
--- a/ctdb/tests/UNIT/cunit/config_test_004.sh
+++ b/ctdb/tests/UNIT/cunit/config_test_004.sh
@@ -31,6 +31,11 @@ ok <<EOF
  EOF
  unit_test ctdb-config get "cluster" "cluster lock"
  
+ok <<EOF
+5
+EOF
+unit_test ctdb-config get "cluster" "leader timeout"
+
  ok <<EOF
  true
  EOF
@@ -97,6 +102,38 @@ Configuration option [cluster] -> recovery lock is deprecated
  EOF
  unit_test ctdb-config -d WARNING validate
  
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = 10
+EOF
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = 0
+EOF
+
+required_result 22 <<EOF
+Invalid value for [cluster] -> leader timeout = 0
+conf: validation for option "leader timeout" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = -5
+EOF
+
+required_result 22 <<EOF
+conf: invalid value [cluster] -> "leader timeout" = "-5"
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
  cat > "$conffile" <<EOF
  [cluster]
      leader capability = false
author	Martin Schwenke <martin@meltin.net>
	Sat, 15 Jan 2022 02:02:02 +0000 (13:02 +1100)
committer	Martin Schwenke <martins@samba.org>
	Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
ctdb/cluster/cluster_conf.c		patch \| blob \| blame \| history
ctdb/cluster/cluster_conf.h		patch \| blob \| blame \| history
ctdb/doc/ctdb.conf.5.xml		patch \| blob \| blame \| history
ctdb/server/ctdb_config.c		patch \| blob \| blame \| history
ctdb/server/ctdb_config.h		patch \| blob \| blame \| history
ctdb/server/ctdb_recoverd.c		patch \| blob \| blame \| history
ctdb/tests/UNIT/cunit/config_test_001.sh		patch \| blob \| blame \| history
ctdb/tests/UNIT/cunit/config_test_004.sh		patch \| blob \| blame \| history