]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
timeout pending controls immediately when a node becomes disconnected
authorAndrew Tridgell <tridge@samba.org>
Fri, 18 May 2007 13:48:29 +0000 (23:48 +1000)
committerAndrew Tridgell <tridge@samba.org>
Fri, 18 May 2007 13:48:29 +0000 (23:48 +1000)
(This used to be ctdb commit 93c4b16f4efef383ba8db83953019ef4821613e0)

ctdb/common/ctdb.c
ctdb/common/ctdb_daemon.c
ctdb/common/ctdb_monitor.c
ctdb/include/ctdb_private.h

index 5471463105e8b4a434d15cd6bff788d68bdd58d8..230f3285e55e5c8fa3a511b9c7c09754b593db01 100644 (file)
@@ -379,6 +379,7 @@ static void ctdb_node_dead(struct ctdb_node *node)
        node->flags &= ~NODE_FLAGS_CONNECTED;
        DEBUG(1,("%s: node %s is dead: %d connected\n", 
                 node->ctdb->name, node->name, node->ctdb->num_connected));
+       ctdb_daemon_cancel_controls(node->ctdb, node);
 }
 
 /*
index d9abe2bce045d07ca67a52d2fac70681cc07d7fc..c0f8d422e8eee5d94bc45040ac5dcf3230e505ef 100644 (file)
@@ -836,16 +836,18 @@ void ctdb_request_finished(struct ctdb_context *ctdb, struct ctdb_req_header *hd
 
 
 struct daemon_control_state {
+       struct daemon_control_state *next, *prev;
        struct ctdb_client *client;
        struct ctdb_req_control *c;
        uint32_t reqid;
+       struct ctdb_node *node;
 };
 
 /*
   callback when a control reply comes in
  */
 static void daemon_control_callback(struct ctdb_context *ctdb,
-                                   uint32_t status, TDB_DATA data, 
+                                   int32_t status, TDB_DATA data, 
                                    const char *errormsg,
                                    void *private_data)
 {
@@ -879,6 +881,30 @@ static void daemon_control_callback(struct ctdb_context *ctdb,
        talloc_free(state);
 }
 
+/*
+  fail all pending controls to a disconnected node
+ */
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
+{
+       struct daemon_control_state *state;
+       while ((state = node->pending_controls)) {
+               DLIST_REMOVE(node->pending_controls, state);
+               daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, 
+                                       "node is disconnected", state);
+       }
+}
+
+/*
+  destroy a daemon_control_state
+ */
+static int daemon_control_destructor(struct daemon_control_state *state)
+{
+       if (state->node) {
+               DLIST_REMOVE(state->node->pending_controls, state);
+       }
+       return 0;
+}
+
 /*
   this is called when the ctdb daemon received a ctdb request control
   from a local client over the unix domain socket
@@ -900,6 +926,14 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
        state->client = client;
        state->c = talloc_steal(state, c);
        state->reqid = c->hdr.reqid;
+       if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) {
+               state->node = client->ctdb->nodes[c->hdr.destnode];
+               DLIST_ADD(state->node->pending_controls, state);
+       } else {
+               state->node = NULL;
+       }
+
+       talloc_set_destructor(state, daemon_control_destructor);
        
        data.dptr = &c->data[0];
        data.dsize = c->datalen;
@@ -912,6 +946,10 @@ static void daemon_request_control_from_client(struct ctdb_client *client,
                DEBUG(0,(__location__ " Failed to send control to remote node %u\n",
                         c->hdr.destnode));
        }
+
+       if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+               talloc_free(state);
+       }
 }
 
 /*
index ff2046ed8add61dccf9d7800931b996b4d1807c3..255ea5ee30acdb886d7e58d39332d125577b33a7 100644 (file)
@@ -58,6 +58,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
                if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
                        DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
                        node->flags &= ~NODE_FLAGS_CONNECTED;
+                       ctdb_daemon_cancel_controls(ctdb, node);
                        /* maybe tell the transport layer to kill the
                           sockets as well?
                        */
index 821a99efd42f40a8cf985faae9adc96ae95232f6..57901ed6a99ac4035850c83755e29c8e6379527b 100644 (file)
@@ -74,7 +74,7 @@ typedef void (*ctdb_queue_cb_fn_t)(uint8_t *data, size_t length,
 
 /* used for callbacks in ctdb_control requests */
 typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
-                                          uint32_t status, TDB_DATA data, 
+                                          int32_t status, TDB_DATA data, 
                                           const char *errormsg,
                                           void *private_data);
 
@@ -93,6 +93,10 @@ struct ctdb_node {
        /* used by the dead node monitoring */
        uint32_t dead_count;
        uint32_t rx_cnt;
+
+       /* a list of controls pending to this node, so we can time them out quickly
+          if the node becomes disconnected */
+       struct daemon_control_state *pending_controls;
 };
 
 /*
@@ -823,4 +827,6 @@ uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
 void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+
 #endif