change the structure used for node flag change messages so that we can

author Ronnie Sahlberg <sahlberg@ronnie>

Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)

committer Ronnie Sahlberg <sahlberg@ronnie>

Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)
author Ronnie Sahlberg <sahlberg@ronnie>
Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)
committer Ronnie Sahlberg <sahlberg@ronnie>
Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h

index 01146f3f43ea7c24c4669d37564c3650761fe924..64caeb30ec41c2b97cfd447a019782e0650e3c7d 100644 (file)
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -506,7 +506,8 @@ struct ctdb_control_tcp_vnn {
   */
  struct ctdb_node_flag_change {
         uint32_t vnn;
-       uint32_t flags;
+       uint32_t new_flags;
+       uint32_t old_flags;
  };
  
  /*
diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c

index f216ca4365d74f092d602887d4cc73b50b7bc9a4..6f09acea17b98f54085f51ff1fabbdff26bcd157 100644 (file)
--- a/ctdb/server/ctdb_daemon.c
+++ b/ctdb/server/ctdb_daemon.c
@@ -51,7 +51,7 @@ static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid,
         /* don't get the disconnected flag from the other node */
         ctdb->nodes[c->vnn]->flags = 
                 (ctdb->nodes[c->vnn]->flags&NODE_FLAGS_DISCONNECTED) 
-               | (c->flags & ~NODE_FLAGS_DISCONNECTED);        
+               | (c->new_flags & ~NODE_FLAGS_DISCONNECTED);    
         DEBUG(2,("Node flags for node %u are now 0x%x\n", c->vnn, ctdb->nodes[c->vnn]->flags));
  
         /* make sure we don't hold any IPs when we shouldn't */
diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c

index ec5244703c08b651d68c677b616e531e2d596faa..243961d228f98c1319bd978be978d81b7c5ed499 100644 (file)
--- a/ctdb/server/ctdb_monitor.c
+++ b/ctdb/server/ctdb_monitor.c
@@ -103,6 +103,9 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
                         timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                         ctdb_check_health, ctdb);
  
+       c.vnn = ctdb->vnn;
+       c.old_flags = node->flags;
+
         if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
                 DEBUG(0,("monitor event failed - disabling node\n"));
                 node->flags |= NODE_FLAGS_UNHEALTHY;
@@ -114,8 +117,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
                 return;
         }
  
-       c.vnn = ctdb->vnn;
-       c.flags = node->flags;
+       c.new_flags = node->flags;
  
         data.dptr = (uint8_t *)&c;
         data.dsize = sizeof(c);
@@ -206,7 +208,8 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
  
         /* if we have been banned, go into recovery mode */
         c.vnn = ctdb->vnn;
-       c.flags = node->flags;
+       c.old_flags = old_flags;
+       c.new_flags = node->flags;
  
         data.dptr = (uint8_t *)&c;
         data.dsize = sizeof(c);
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c

index 5cb985521d7a2781029417c27d08326e80475e80..812214088e7876ddcf97007ecab80c5755e58750 100644 (file)
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -386,7 +386,8 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
                 TDB_DATA data;
  
                 c.vnn = nodemap->nodes[i].vnn;
-               c.flags = nodemap->nodes[i].flags;
+               c.old_flags = nodemap->nodes[i].flags;
+               c.new_flags = nodemap->nodes[i].flags;
  
                 data.dptr = (uint8_t *)&c;
                 data.dsize = sizeof(c);
@@ -815,7 +816,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
  
         /* send a message to all clients telling them that the cluster 
            has been reconfigured */
-       ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, CTDB_SRVID_RECONFIGURE, tdb_null);
+       ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
  
         DEBUG(0, (__location__ " Recovery complete\n"));
  
@@ -1045,6 +1046,7 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
         struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
         struct ctdb_node_map *nodemap=NULL;
         TALLOC_CTX *tmp_ctx;
+       uint32_t changed_flags;
         int i;
  
         if (data.dsize != sizeof(*c)) {
@@ -1067,20 +1069,22 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
                 return;
         }
  
+       changed_flags = c->old_flags ^ c->new_flags;
+
         /* Dont let messages from remote nodes change the DISCONNECTED flag. 
            This flag is handled locally based on whether the local node
            can communicate with the node or not.
         */
-       c->flags &= ~NODE_FLAGS_DISCONNECTED;
+       c->new_flags &= ~NODE_FLAGS_DISCONNECTED;
         if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
-               c->flags |= NODE_FLAGS_DISCONNECTED;
+               c->new_flags |= NODE_FLAGS_DISCONNECTED;
         }
  
-       if (nodemap->nodes[i].flags != c->flags) {
-               DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
+       if (nodemap->nodes[i].flags != c->new_flags) {
+               DEBUG(0,("Node %u has changed flags - now 0x%x  was 0x%x\n", c->vnn, c->new_flags, c->old_flags));
         }
  
-       nodemap->nodes[i].flags = c->flags;
+       nodemap->nodes[i].flags = c->new_flags;
  
         ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), 
                                      CTDB_CURRENT_NODE, &ctdb->recovery_master);
@@ -1094,9 +1098,21 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
             ctdb->recovery_master == ctdb->vnn &&
             ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&
             ctdb->takeover.enabled) {
-               ret = ctdb_takeover_run(ctdb, nodemap);
-               if (ret != 0) {
-                       DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+               /* Only do the takeover run if the perm disabled or unhealthy
+                  flags changed since these will cause an ip failover but not
+                  a recovery.
+                  If the node became disconnected or banned this will also
+                  lead to an ip address failover but that is handled 
+                  during recovery
+               */
+               if (changed_flags & NODE_FLAGS_DISABLED) {
+                       ret = ctdb_takeover_run(ctdb, nodemap);
+                       if (ret != 0) {
+                               DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
+                       }
+                       /* send a message to all clients telling them that the 
+                          cluster has been reconfigured */
+                       ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECONFIGURE, tdb_null);
                 }
         }
author	Ronnie Sahlberg <sahlberg@ronnie>
	Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)
committer	Ronnie Sahlberg <sahlberg@ronnie>
	Tue, 21 Aug 2007 07:25:15 +0000 (17:25 +1000)
ctdb/include/ctdb_private.h		patch \| blob \| blame \| history
ctdb/server/ctdb_daemon.c		patch \| blob \| blame \| history
ctdb/server/ctdb_monitor.c		patch \| blob \| blame \| history
ctdb/server/ctdb_recoverd.c		patch \| blob \| blame \| history