From: Ronnie Sahlberg Date: Thu, 23 Aug 2007 03:48:39 +0000 (+1000) Subject: break checking that the recoverymode on all nodes are ok out into its X-Git-Tag: tevent-0.9.20~348^2~2435^2~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4c13bf0c5f56e3309a6fba3a59912ab48a703944;p=thirdparty%2Fsamba.git break checking that the recoverymode on all nodes are ok out into its own function (This used to be ctdb commit 813cf9a252af96da24122b80f24aabeed2911939) --- diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 1b1b056cd84..df46f2fa3f1 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1138,13 +1138,65 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, } +enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_FAILED}; + + +/* verify that all nodes are in recovery mode normal */ +static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, TALLOC_CTX *mem_ctx) +{ + struct ctdb_client_control_state **ctrl_states; + uint32_t recmode; + int j, ret; + + ctrl_states = talloc_array(mem_ctx, struct ctdb_client_control_state *, + nodemap->num); + if (!ctrl_states) { + DEBUG(0,(__location__ " Failed to allocate temporary ctrl state array\n")); + exit(-1); + } + + + /* loop over all active nodes and send an async getrecmode call to + them*/ + for (j=0; jnum; j++) { + if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { + ctrl_states[j] = NULL; + continue; + } + ctrl_states[j] = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, + CONTROL_TIMEOUT(), + nodemap->nodes[j].vnn); + } + + /* wait for the responses to come back and check that all is ok */ + for (j=0; jnum; j++) { + if (ctrl_states[j] == NULL) { + continue; + } + ret = ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, ctrl_states[j], &recmode); + if (ret != 0) { + DEBUG(0, ("Unable to get recmode from node %u\n", nodemap->nodes[j].vnn)); + talloc_free(ctrl_states); + return MONITOR_FAILED; + } + if (recmode != CTDB_RECOVERY_NORMAL) { + DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn)); + talloc_free(ctrl_states); + return MONITOR_RECOVERY_NEEDED; + } + } + + talloc_free(ctrl_states); + return MONITOR_OK; +} + /* the main monitoring loop */ static void monitor_cluster(struct ctdb_context *ctdb) { - uint32_t vnn, num_active, recmode, recmaster; + uint32_t vnn, num_active, recmaster; TALLOC_CTX *mem_ctx=NULL; struct ctdb_node_map *nodemap=NULL; struct ctdb_node_map *remote_nodemap=NULL; @@ -1302,25 +1354,19 @@ again: /* verify that all active nodes are in normal mode and not in recovery mode */ - for (j=0; jnum; j++) { - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - ret = ctdb_ctrl_getrecmode(ctdb, mem_ctx, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmode); - if (ret != 0) { - DEBUG(0, ("Unable to get recmode from node %u\n", vnn)); - goto again; - } - if (recmode != CTDB_RECOVERY_NORMAL) { - DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", - nodemap->nodes[j].vnn)); - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); - goto again; - } + /* send a getrecmode call out to every node */ + switch (verify_recmode(ctdb, nodemap, mem_ctx)) { + case MONITOR_RECOVERY_NEEDED: + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + goto again; + case MONITOR_FAILED: + goto again; + case MONITOR_OK: + break; } + /* get the nodemap for all active remote nodes and verify they are the same as for this node */