From 6052078b53cd996bf82ca032ee944de70cbf2628 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 14 Sep 2007 10:16:36 +1000 Subject: [PATCH] let each node verify that they have a correct assignment of public ip addresses (i.e. htey hold those they should hold and they dont hold any of those they shouldnt hold) if an inconsistency is found, mark the local node as recovery mode active and wait for the recovery master to trigger a full blown recovery (This used to be ctdb commit 55a5bfc8244c5b9cdda3f11992f384f00566b5dc) --- ctdb/server/ctdb_recoverd.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c39cfda68d1..b299c7a058d 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -21,6 +21,7 @@ #include "lib/events/events.h" #include "system/filesys.h" #include "system/time.h" +#include "system/network.h" #include "popt.h" #include "cmdline.h" #include "../include/ctdb.h" @@ -1418,6 +1419,7 @@ static void monitor_cluster(struct ctdb_context *ctdb) struct ctdb_vnn_map *remote_vnnmap=NULL; int i, j, ret; struct ctdb_recoverd *rec; + struct ctdb_all_public_ips *ips; rec = talloc_zero(ctdb, struct ctdb_recoverd); CTDB_NO_MEMORY_FATAL(ctdb, rec); @@ -1528,7 +1530,34 @@ again: force_election(rec, mem_ctx, pnn, nodemap); goto again; } - + + /* verify that the public ip address allocation is consistent */ + if (ctdb->vnn != NULL) { + ret = ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips); + if (ret != 0) { + DEBUG(0, ("Unable to get public ips from node %u\n", i)); + goto again; + } + for (j=0; jnum; j++) { + /* verify that we have the ip addresses we should have + and we dont have ones we shouldnt have. + if we find an inconsistency we set recmode to + active on the local node and wait for the recmaster + to do a full blown recovery + */ + if (ips->ips[j].pnn == pnn) { + if (!ctdb_sys_have_ip(ips->ips[j].sin)) { + DEBUG(0,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr))); + ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE); + } + } else { + if (ctdb_sys_have_ip(ips->ips[j].sin)) { + DEBUG(0,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr))); + ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE); + } + } + } + } /* if we are not the recmaster then we do not need to check if recovery is needed -- 2.47.3