]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
ctdb-config: Add configuration option [cluster] leader timeout
authorMartin Schwenke <martin@meltin.net>
Sat, 15 Jan 2022 02:02:02 +0000 (13:02 +1100)
committerMartin Schwenke <martins@samba.org>
Mon, 17 Jan 2022 10:21:33 +0000 (10:21 +0000)
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/cluster/cluster_conf.c
ctdb/cluster/cluster_conf.h
ctdb/doc/ctdb.conf.5.xml
ctdb/server/ctdb_config.c
ctdb/server/ctdb_config.h
ctdb/server/ctdb_recoverd.c
ctdb/tests/UNIT/cunit/config_test_001.sh
ctdb/tests/UNIT/cunit/config_test_004.sh

index c2fd3e1fe7873ebe2d8901cb35b571f0b39f9dfc..bdd64ba112f25c1b243ee9c999bc12d4284308e6 100644 (file)
@@ -131,6 +131,20 @@ static bool validate_recovery_lock(const char *key,
        return status;
 }
 
+static bool validate_leader_timeout(const char *key,
+                                   int old_timeout,
+                                   int new_timeout,
+                                   enum conf_update_mode mode)
+{
+       if (new_timeout <= 0) {
+               D_ERR("Invalid value for [cluster] -> leader timeout = %d\n",
+                     new_timeout);
+               return false;
+       }
+
+       return true;
+}
+
 void cluster_conf_init(struct conf_context *conf)
 {
        conf_define_section(conf, CLUSTER_CONF_SECTION, NULL);
@@ -155,6 +169,11 @@ void cluster_conf_init(struct conf_context *conf)
                           CLUSTER_CONF_RECOVERY_LOCK,
                           NULL,
                           validate_recovery_lock);
+       conf_define_integer(conf,
+                           CLUSTER_CONF_SECTION,
+                           CLUSTER_CONF_LEADER_TIMEOUT,
+                           5,
+                           validate_leader_timeout);
        conf_define_boolean(conf,
                            CLUSTER_CONF_SECTION,
                            CLUSTER_CONF_LEADER_CAPABILITY,
index 32334f1a5b4fa0f031087d7a58905e7beccc9c85..38c378fd57185d7d2e3b1e5019b32065bd3626a3 100644 (file)
@@ -28,6 +28,7 @@
 #define CLUSTER_CONF_NODE_ADDRESS    "node address"
 #define CLUSTER_CONF_CLUSTER_LOCK    "cluster lock"
 #define CLUSTER_CONF_RECOVERY_LOCK   "recovery lock"
+#define CLUSTER_CONF_LEADER_TIMEOUT  "leader timeout"
 #define CLUSTER_CONF_LEADER_CAPABILITY "leader capability"
 
 void cluster_conf_init(struct conf_context *conf);
index ae65f8fae4bc6e976967043b42004b5cb3f65201..87a7ea594ea620fed3e5120fb40d17f97ed137d4 100644 (file)
        </listitem>
       </varlistentry>
 
+      <varlistentry>
+       <term>leader timeout = <parameter>SECONDS</parameter></term>
+       <listitem>
+         <para>
+           Number of SECONDS without a leader broadcast before a node
+           triggers an election.
+         </para>
+         <para>
+           Default: <literal>5</literal>
+         </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term>node address = <parameter>IPADDR</parameter></term>
        <listitem>
index 5eabf36501c5fb188dd77cbe2efcc9a870de858d..72830278c4299fa0b95374792b28f7da99a6cbfa 100644 (file)
@@ -57,6 +57,10 @@ static void setup_config_pointers(struct conf_context *conf)
                                   CLUSTER_CONF_SECTION,
                                   CLUSTER_CONF_RECOVERY_LOCK,
                                   &ctdb_config.recovery_lock);
+       conf_assign_integer_pointer(conf,
+                                   CLUSTER_CONF_SECTION,
+                                   CLUSTER_CONF_LEADER_TIMEOUT,
+                                   &ctdb_config.leader_timeout);
        conf_assign_boolean_pointer(conf,
                                    CLUSTER_CONF_SECTION,
                                    CLUSTER_CONF_LEADER_CAPABILITY,
index 1749038617fc6bb8168bd106155635c1d797bc6f..7ccda7d5d53d5b0be7c5c98c8ab393d82d3c35bf 100644 (file)
@@ -28,6 +28,7 @@ struct ctdb_config {
        const char *node_address;
        const char *cluster_lock;
        const char *recovery_lock;
+       int leader_timeout;
        bool leader_capability;
 
        /* Database */
index c2a48a07b4ef00f16727368939935fe78ffd436a..cc239959c56826340e9cce3c5548c848d2c9d875 100644 (file)
@@ -48,8 +48,6 @@
 
 #include "ctdb_cluster_mutex.h"
 
-#define LEADER_BROADCAST_TIMEOUT 5
-
 /* List of SRVID requests that need to be processed */
 struct srvid_list {
        struct srvid_list *next, *prev;
@@ -253,7 +251,6 @@ struct ctdb_recoverd {
        uint32_t leader;
        struct tevent_timer *leader_broadcast_te;
        struct tevent_timer *leader_broadcast_timeout_te;
-       unsigned int leader_broadcast_timeout;
        uint32_t pnn;
        uint32_t last_culprit_node;
        struct ctdb_node_map_old *nodemap;
@@ -2006,7 +2003,7 @@ static int leader_broadcast_timeout_start(struct ctdb_recoverd *rec)
                tevent_add_timer(
                        ctdb->ev,
                        rec,
-                       timeval_current_ofs(rec->leader_broadcast_timeout, 0),
+                       timeval_current_ofs(ctdb_config.leader_timeout, 0),
                        leader_broadcast_timeout_handler,
                        rec);
        if (rec->leader_broadcast_timeout_te == NULL) {
@@ -2979,7 +2976,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
        rec->leader = CTDB_UNKNOWN_PNN;
        rec->pnn = ctdb_get_pnn(ctdb);
        rec->cluster_lock_handle = NULL;
-       rec->leader_broadcast_timeout = LEADER_BROADCAST_TIMEOUT;
        rec->helper_pid = -1;
 
        rec->takeover_run = ctdb_op_init(rec, "takeover runs");
index 1f674a62b32a3dce4067291039742077363c5456..5dd4581996864f9d194098a1785c49de715fcb93 100755 (executable)
@@ -36,6 +36,7 @@ ok <<EOF
        # node address = 
        # cluster lock = 
        # recovery lock = 
+       # leader timeout = 5
        # leader capability = true
 [database]
        # volatile database directory = ${database_volatile_dbdir}
index bcfcc80e0ad20bab9e751ad2095ee5e2f81d7fc3..ebbc05b63292ab84247cde1e3fcfefa93c43f3c6 100755 (executable)
@@ -31,6 +31,11 @@ ok <<EOF
 EOF
 unit_test ctdb-config get "cluster" "cluster lock"
 
+ok <<EOF
+5
+EOF
+unit_test ctdb-config get "cluster" "leader timeout"
+
 ok <<EOF
 true
 EOF
@@ -97,6 +102,38 @@ Configuration option [cluster] -> recovery lock is deprecated
 EOF
 unit_test ctdb-config -d WARNING validate
 
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = 10
+EOF
+
+required_result 0 <<EOF
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = 0
+EOF
+
+required_result 22 <<EOF
+Invalid value for [cluster] -> leader timeout = 0
+conf: validation for option "leader timeout" failed
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
+cat > "$conffile" <<EOF
+[cluster]
+    leader timeout = -5
+EOF
+
+required_result 22 <<EOF
+conf: invalid value [cluster] -> "leader timeout" = "-5"
+Failed to load config file $conffile
+EOF
+unit_test ctdb-config validate
+
 cat > "$conffile" <<EOF
 [cluster]
     leader capability = false