From: Andrew Tridgell Date: Mon, 10 Sep 2007 03:21:11 +0000 (+1000) Subject: merge from ronnie X-Git-Tag: tevent-0.9.20~348^2~2430 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=42168177ef3a75534827e3e4b7d143d273bff465;p=thirdparty%2Fsamba.git merge from ronnie (This used to be ctdb commit 1f21d4d563232926c35d03c4d69eb69190823dc6) --- 42168177ef3a75534827e3e4b7d143d273bff465 diff --cc ctdb/server/ctdb_takeover.c index 89787caf1be,b252b07737d..71c294da44c --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@@ -514,50 -657,146 +657,146 @@@ int ctdb_takeover_run(struct ctdb_conte ZERO_STRUCT(ip); - /* Work out which node will look after each public IP. - * takeover_node cycles over the nodes and is incremented each time a - * node has been assigned to take over for another node. - * This spreads the failed nodes out across the remaining - * nodes more evenly - */ + /* Count how many completely healthy nodes we have */ + num_healthy = 0; for (i=0;inum;i++) { if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) { - ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn; - } else { - uint32_t takeover_vnn; + num_healthy++; + } + } - /* If this public address has already been taken over - by a node and that node is still healthy, then - leave the public address at that node. - */ - takeover_vnn = ctdb->nodes[i]->takeover_vnn; - if ( ctdb_validate_vnn(ctdb, takeover_vnn) - && (!(nodemap->nodes[takeover_vnn].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) ) { - continue; + if (num_healthy > 0) { + /* We have healthy nodes, so only consider them for + serving public addresses + */ + mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED; + } else { + /* We didnt have any completely healthy nodes so + use "disabled" nodes as a fallback + */ + mask = NODE_FLAGS_INACTIVE; + } + + /* since nodes only know about those public addresses that + can be served by that particular node, no single node has + a full list of all public addresses that exist in the cluster. + Walk over all node structures and create a merged list of + all public addresses that exist in the cluster. + */ + all_ips = create_merged_ip_list(ctdb, tmp_ctx); + + + /* mark all public addresses with a masked node as being served by + node -1 + */ + for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) { + if (tmp_ip->pnn == -1) { + continue; + } + if (nodemap->nodes[tmp_ip->pnn].flags & mask) { + tmp_ip->pnn = -1; + } + } + + + /* now we must redistribute all public addresses with takeover node + -1 among the nodes available + */ + retries = 0; + try_again: + /* loop over all ip's and find a physical node to cover for + each unassigned ip. + */ + for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) { + if (tmp_ip->pnn == -1) { + if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) { + DEBUG(0,("Failed to find node to cover ip %s\n", inet_ntoa(tmp_ip->sin.sin_addr))); } + } + } + + + /* now, try to make sure the ip adresses are evenly distributed + across the node. + for each ip address, loop over all nodes that can serve this + ip and make sure that the difference between the node + serving the most and the node serving the least ip's are not greater + than 1. + */ + for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) { + if (tmp_ip->pnn == -1) { + continue; + } + /* Get the highest and lowest number of ips's served by any + valid node which can serve this ip. + */ + maxnode = -1; + minnode = -1; + for (i=0;inum;i++) { + if (nodemap->nodes[i].flags & mask) { + continue; + } - ctdb->nodes[i]->takeover_vnn = (uint32_t)-1; + /* only check nodes that can actually serve this ip */ + if (can_node_serve_ip(ctdb, i, tmp_ip)) { + /* no it couldnt so skip to the next node */ + continue; + } - ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED); - - /* if no enabled node can take it, then we - might as well use any active node. It - probably means that some subsystem (such as - NFS) is sick on all nodes. Best we can do - is to keep the other services up. */ - if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) { - ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE); + num = node_ip_coverage(ctdb, i, all_ips); + if (maxnode == -1) { + maxnode = i; + maxnum = num; + } else { + if (num > maxnum) { + maxnode = i; + maxnum = num; + } } + if (minnode == -1) { + minnode = i; + minnum = num; + } else { + if (num < minnum) { + minnode = i; + minnum = num; + } + } + } + if (maxnode == -1) { - DEBUG(0,(__location__ " Could not find maxnode. May not be able to server ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr))); ++ DEBUG(0,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr))); + continue; + } + + /* if the spread between the smallest and largest coverage by + a node is >=2 we steal one of the ips from the node with + most coverage to even things out a bit. + try to do this at most 5 times since we dont want to spend + too much time balancing the ip coverage. + */ + if ( (maxnum > minnum+1) + && (retries < 5) ){ + struct ctdb_public_ip_list *tmp; - if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) { - DEBUG(0,(__location__ " No node available on same network to take %s\n", - ctdb->nodes[i]->public_address)); + /* mark one of maxnode's vnn's as unassigned and try + again + */ + for (tmp=all_ips;tmp;tmp=tmp->next) { + if (tmp->pnn == maxnode) { + tmp->pnn = -1; + retries++; + goto try_again; + } } } - } + } + - /* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */ + + /* at this point ->pnn is the node which will own each IP + or -1 if there is no node that can cover this ip + */ /* now tell all nodes to delete any alias that they should not have. This will be a NOOP on nodes that don't currently