From: Ronnie Sahlberg Date: Sun, 6 May 2007 18:41:12 +0000 (+1000) Subject: add a test in the function that checks whether the cluster needs X-Git-Tag: tevent-0.9.20~348^2~2768^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=97bc457321d69cf58d2b1b049ef12fa14ec24e02;p=thirdparty%2Fsamba.git add a test in the function that checks whether the cluster needs recovery or not that all active nodes are in normal mode. If we discover that some node is still in recoverymode it may indicate that a previous recovery ended prematurely and thus we should start a new recovery (This used to be ctdb commit c15517872e6c98c8c425a8d47d2b348ecb0620b0) --- diff --git a/ctdb/direct/recoverd.c b/ctdb/direct/recoverd.c index afc20f517e5..2a82b39760f 100644 --- a/ctdb/direct/recoverd.c +++ b/ctdb/direct/recoverd.c @@ -422,9 +422,12 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev, return 0; } -void recoverd(struct ctdb_context *ctdb, struct event_context *ev) + + + +void monitor_cluster(struct ctdb_context *ctdb, struct event_context *ev) { - uint32_t vnn, num_active; + uint32_t vnn, num_active, recmode; TALLOC_CTX *mem_ctx=NULL; struct ctdb_node_map *nodemap=NULL; struct ctdb_node_map *remote_nodemap=NULL; @@ -455,6 +458,15 @@ again: /* get our vnn number */ vnn = ctdb_get_vnn(ctdb); + + /* get the vnnmap */ + ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap); + if (ret != 0) { + DEBUG(0, (__location__ "Unable to get vnnmap from node %u\n", vnn)); + goto again; + } + + /* get number of nodes */ ret = ctdb_ctrl_getnodemap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &nodemap); if (ret != 0) { @@ -462,6 +474,7 @@ again: goto again; } + /* count how many active nodes there are */ num_active = 0; for (i=0; inum; i++) { @@ -471,6 +484,27 @@ again: } + /* verify that all active nodes are in normal mode + and not in recovery mode + */ + for (j=0; jnum; j++) { + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_getrecmode(ctdb, timeval_current_ofs(1, 0), vnn, &recmode); + if (ret != 0) { + DEBUG(0, ("Unable to get recmode from node %u\n", vnn)); + goto again; + } + if (recmode!=CTDB_RECOVERY_NORMAL) { + DEBUG(0, (__location__ "Node:%d was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn)); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); + goto again; + } + } + + /* get the nodemap for all active remote nodes and verify they are the same as for this node */ @@ -511,12 +545,6 @@ again: } - /* get the vnnmap */ - ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap); - if (ret != 0) { - DEBUG(0, (__location__ "Unable to get vnnmap from node %u\n", vnn)); - goto again; - } /* there better be the same number of lmasters in the vnn map as there are active nodes or well have to do a recovery @@ -649,7 +677,7 @@ int main(int argc, const char *argv[]) } - recoverd(ctdb, ev); + monitor_cluster(ctdb, ev); return ret; }