}
-enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT};
-
struct ctdb_client_control_state {
struct ctdb_context *ctdb;
uint32_t reqid;
enum control_state state;
char *errormsg;
struct ctdb_req_control *c;
+
+ /* if we have a callback registered for the completion (or failure) of
+ this control
+ if a callback is used, it MUST talloc_free the cb_data passed to it
+ */
+ control_callback callback;
+ void *cb_private;
};
+/*
+ called when a control completes or timesout to invoke the callback
+ function the user provided
+*/
+static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_client_control_state *state;
+ struct ctdb_control_cb_data *cb_data;
+ struct ctdb_context *ctdb;
+ control_callback callback;
+ void *cb_private;
+ TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+ int ret;
+
+ state = talloc_get_type(private_data, struct ctdb_client_control_state);
+ talloc_steal(tmp_ctx, state);
+
+ ctdb = state->ctdb;
+ callback = state->callback;
+ cb_private = state->cb_private;
+
+ cb_data = talloc_zero(tmp_ctx, struct ctdb_control_cb_data);
+ if (cb_data == NULL) {
+ talloc_free(tmp_ctx);
+ CTDB_NO_MEMORY_VOID(ctdb, cb_data);
+ }
+
+ cb_data->state = state->state;
+ cb_data->vnn = state->c->hdr.destnode;
+
+ ret = ctdb_control_recv(ctdb, state, cb_data,
+ &cb_data->outdata,
+ &cb_data->status,
+ &cb_data->errormsg);
+ /* we dont check ret since we expect that ctdb_control_recv can fail
+ for example if the control timedout
+
+ state is always talloc_free()'d inside ctdb_control_recv
+ */
+
+ callback(cb_data, cb_private);
+ talloc_free(tmp_ctx);
+}
+
/*
called when a CTDB_REPLY_CONTROL packet comes in in the client
talloc_steal(state, c);
state->state = CTDB_CONTROL_DONE;
+
+ /* if we had a callback registered for this control, pull the response
+ and call the callback.
+ */
+ if (state->callback) {
+ event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
+ }
}
DEBUG(0,("control timed out. reqid:%d opcode:%d dstnode:%d\n", state->reqid, state->c->opcode, state->c->hdr.destnode));
state->state = CTDB_CONTROL_TIMEOUT;
+
+ /* if we had a callback registered for this control, pull the response
+ and call the callback.
+ */
+ if (state->callback) {
+ event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
+ }
}
/* async version of send control request */
uint32_t opcode, uint32_t flags, TDB_DATA data,
TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
struct timeval *timeout,
- char **errormsg)
-
+ char **errormsg,
+ control_callback callback, void *cb_private)
{
struct ctdb_client_control_state *state;
size_t len;
ctdb_socket_connect(ctdb);
}
- state = talloc_zero(ctdb, struct ctdb_client_control_state);
+ state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
CTDB_NO_MEMORY_NULL(ctdb, state);
- state->ctdb = ctdb;
- state->reqid = ctdb_reqid_new(ctdb, state);
- state->state = CTDB_CONTROL_WAIT;
- state->errormsg= NULL;
+ state->ctdb = ctdb;
+ state->reqid = ctdb_reqid_new(ctdb, state);
+ state->state = CTDB_CONTROL_WAIT;
+ state->errormsg = NULL;
+ state->callback = callback;
+ state->cb_private = cb_private;
talloc_set_destructor(state, ctdb_control_destructor);
memcpy(&c->data[0], data.dptr, data.dsize);
}
+ /* timeout */
+ if (timeout && !timeval_is_zero(timeout)) {
+ event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
+ }
+
ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
if (ret != 0) {
talloc_free(state);
return NULL;
}
- /* timeout */
- if (timeout && !timeval_is_zero(timeout)) {
- event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
- }
-
return state;
}
state = ctdb_control_send(ctdb, destnode, srvid, opcode,
flags, data, mem_ctx, outdata,
- timeout, errormsg);
+ timeout, errormsg,
+ NULL, NULL);
return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
errormsg);
}
}
+/*
+ get the recovery mode of a remote node
+ */
struct ctdb_client_control_state *
ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
{
return ctdb_control_send(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
- mem_ctx, NULL, &timeout, NULL);
+ mem_ctx, NULL, &timeout, NULL,
+ NULL, NULL);
}
int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
return 0;
}
-/*
- get the recovery mode of a remote node
- */
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
{
struct ctdb_client_control_state *state;
return 0;
}
+
+
/*
get the recovery master of a remote node
*/
-int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
+struct ctdb_client_control_state *
+ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
+ struct timeval timeout, uint32_t destnode,
+ control_callback callback, void *cb_private)
+{
+ return ctdb_control_send(ctdb, destnode, 0,
+ CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
+ mem_ctx, NULL, &timeout, NULL,
+ callback, cb_private);
+}
+
+int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
{
int ret;
int32_t res;
- ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
- NULL, NULL, &res, &timeout, NULL);
+ ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
if (ret != 0) {
- DEBUG(0,(__location__ " ctdb_control for getrecmaster failed\n"));
+ DEBUG(0,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
return -1;
}
- *recmaster = res;
+ if (recmaster) {
+ *recmaster = (uint32_t)res;
+ }
return 0;
}
+int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
+{
+ struct ctdb_client_control_state *state;
+
+ state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode, NULL, NULL);
+ return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
+}
+
+
/*
set the recovery master of a remote node
*/
#define CTDB_BROADCAST_CONNECTED 0xF0000004
+enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT};
+
+struct ctdb_control_cb_data {
+ enum control_state state;
+ uint32_t vnn;
+ int32_t status;
+ TDB_DATA outdata;
+ char *errormsg;
+};
+
+typedef int (*control_callback)(struct ctdb_control_cb_data *cb_data, void *cb_private);
+
+
struct event_context;
/*
/*
get the recovery master of a remote node
*/
-int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
+int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
+
+struct ctdb_client_control_state *ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, control_callback callback, void *cb_private);
+
+int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster);
+
+
+
/*
set the recovery master of a remote node
*/
uint32_t opcode, uint32_t flags, TDB_DATA data,
TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
struct timeval *timeout, char **errormsg);
+int ctdb_control_recv(struct ctdb_context *ctdb,
+ struct ctdb_client_control_state *state,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA *outdata, int32_t *status, char **errormsg);
+
+struct ctdb_client_control_state *
+ctdb_control_send(struct ctdb_context *ctdb,
+ uint32_t destnode, uint64_t srvid,
+ uint32_t opcode, uint32_t flags, TDB_DATA data,
+ TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
+ struct timeval *timeout,
+ char **errormsg,
+ control_callback callback, void *cb_private);
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
uint32_t recmaster;
int ret;
if (data.dsize != sizeof(*b)) {
DEBUG(0,("Bad data in ban_handler\n"));
+ talloc_free(mem_ctx);
return;
}
- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
+ talloc_free(mem_ctx);
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring ban request\n"));
+ talloc_free(mem_ctx);
return;
}
DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n",
b->vnn, b->ban_time));
ctdb_ban_node(rec, b->vnn, b->ban_time);
+ talloc_free(mem_ctx);
}
/*
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
uint32_t vnn;
int ret;
uint32_t recmaster;
if (data.dsize != sizeof(uint32_t)) {
DEBUG(0,("Bad data in unban_handler\n"));
+ talloc_free(mem_ctx);
return;
}
vnn = *(uint32_t *)data.dptr;
- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
+ talloc_free(mem_ctx);
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring unban request\n"));
+ talloc_free(mem_ctx);
return;
}
DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));
ctdb_unban_node(rec, vnn);
+ talloc_free(mem_ctx);
}
nodemap->nodes[i].flags = c->new_flags;
- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(),
+ ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, CONTROL_TIMEOUT(),
CTDB_CURRENT_NODE, &ctdb->recovery_master);
if (ret == 0) {
}
-enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_FAILED};
+enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
/* verify that all nodes are in recovery mode normal */
-static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, TALLOC_CTX *mem_ctx)
+static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
struct ctdb_client_control_state **ctrl_states;
uint32_t recmode;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
int j, ret;
ctrl_states = talloc_array(mem_ctx, struct ctdb_client_control_state *,
nodemap->num);
if (!ctrl_states) {
DEBUG(0,(__location__ " Failed to allocate temporary ctrl state array\n"));
+ talloc_free(mem_ctx);
exit(-1);
}
ret = ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, ctrl_states[j], &recmode);
if (ret != 0) {
DEBUG(0, ("Unable to get recmode from node %u\n", nodemap->nodes[j].vnn));
- talloc_free(ctrl_states);
+ talloc_free(mem_ctx);
return MONITOR_FAILED;
}
+
if (recmode != CTDB_RECOVERY_NORMAL) {
DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn));
- talloc_free(ctrl_states);
+ talloc_free(mem_ctx);
return MONITOR_RECOVERY_NEEDED;
}
}
- talloc_free(ctrl_states);
+ talloc_free(mem_ctx);
return MONITOR_OK;
}
+struct verify_recmaster_data {
+ uint32_t count;
+ uint32_t vnn;
+ enum monitor_result status;
+};
+
+static int verify_recmaster_callback(struct ctdb_control_cb_data *cb_data, void *cb_private)
+{
+ struct verify_recmaster_data *rmdata = talloc_get_type(cb_private, struct verify_recmaster_data);
+
+
+ /* one more node has responded with recmaster data*/
+ rmdata->count--;
+
+ /* if we failed to get the recmaster, then return an error and let
+ the main loop try again.
+ */
+ if (cb_data->state != CTDB_CONTROL_DONE) {
+ if (rmdata->status == MONITOR_OK) {
+ rmdata->status = MONITOR_FAILED;
+ }
+ return 0;
+ }
+
+ /* if we got a response, then the recmaster will be stored in the
+ status field
+ */
+ if (cb_data->status != rmdata->vnn) {
+ DEBUG(0,("Node %d does not agree we are the recmaster. Need a new recmaster election\n",cb_data->vnn));
+ rmdata->status = MONITOR_ELECTION_NEEDED;
+ }
+
+ return 0;
+}
+
+static void verify_recmaster_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
+{
+ uint32_t *timedout = (uint32_t *)p;
+
+ *timedout = 1;
+}
+
+/* verify that all nodes agree that we are the recmaster */
+static enum monitor_result verify_recmaster(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
+{
+ struct verify_recmaster_data *rmdata;
+ TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+ struct ctdb_client_control_state *state;
+ enum monitor_result status;
+ uint32_t timedout;
+ int j;
+
+ rmdata = talloc(mem_ctx, struct verify_recmaster_data);
+ CTDB_NO_MEMORY_FATAL(ctdb, rmdata);
+ rmdata->count = 0;
+ rmdata->vnn = vnn;
+ rmdata->status = MONITOR_OK;
+
+ /* loop over all active nodes and send an async getrecmaster call to
+ them*/
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+ state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx,
+ timeval_zero(),
+ nodemap->nodes[j].vnn,
+ verify_recmaster_callback, rmdata);
+ if (state == NULL) {
+ /* we failed to send the control, treat this as
+ an error and try again next iteration
+ */
+ DEBUG(0,("Failed to call ctdb_ctrl_getrecmaster_send during monitoring\n"));
+ talloc_free(mem_ctx);
+ return MONITOR_FAILED;
+ }
+
+ /* one more control to wait for to complete */
+ rmdata->count++;
+ }
+
+
+ /* now wait for up to the maximum number of seconds allowed
+ or until all nodes we expect a response from has replied
+ */
+ timedout = 0;
+ event_add_timed(ctdb->ev, rmdata, CONTROL_TIMEOUT(),
+ verify_recmaster_timeout, &timedout);
+
+ while ( (rmdata->count > 0)
+ && (timedout == 0) ) {
+ event_loop_once(ctdb->ev);
+ }
+ if (timedout) {
+ DEBUG(0,("Timedout while waiting for getrecmaster replies.\n"));
+ rmdata->status = MONITOR_FAILED;
+ }
+
+
+ status = rmdata->status;
+ talloc_free(mem_ctx);
+ return status;
+}
+
/*
the main monitoring loop
/* check which node is the recovery master */
- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), vnn, &recmaster);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
goto again;
/* verify that all active nodes agree that we are the recmaster */
- for (j=0; j<nodemap->num; j++) {
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
- if (nodemap->nodes[j].vnn == vnn) {
- continue;
- }
-
- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmaster);
- if (ret != 0) {
- DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
- goto again;
- }
-
- if (recmaster!=vnn) {
- DEBUG(0, ("Node %u does not agree we are the recmaster. Force reelection\n",
- nodemap->nodes[j].vnn));
- force_election(rec, mem_ctx, vnn, nodemap);
- goto again;
- }
+ switch (verify_recmaster(ctdb, nodemap, vnn)) {
+ case MONITOR_RECOVERY_NEEDED:
+ /* can not happen */
+ goto again;
+ case MONITOR_ELECTION_NEEDED:
+ force_election(rec, mem_ctx, vnn, nodemap);
+ goto again;
+ case MONITOR_OK:
+ break;
+ case MONITOR_FAILED:
+ goto again;
}
/* verify that all active nodes are in normal mode
and not in recovery mode
*/
- /* send a getrecmode call out to every node */
- switch (verify_recmode(ctdb, nodemap, mem_ctx)) {
+ switch (verify_recmode(ctdb, nodemap)) {
case MONITOR_RECOVERY_NEEDED:
do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
case MONITOR_FAILED:
goto again;
+ case MONITOR_ELECTION_NEEDED:
+ /* can not happen */
case MONITOR_OK:
break;
}
}
printf("Recovery mode:%s (%d)\n",recmode==CTDB_RECOVERY_NORMAL?"NORMAL":"RECOVERY",recmode);
- ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0, ("Unable to get recmaster from node %u\n", options.vnn));
return ret;
ban_time = strtoul(argv[0], NULL, 0);
- ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;
uint32_t recmaster;
TDB_DATA data;
- ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
+ ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;