/*
get a list of databases off a remote node
*/
-int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_dbid_map *dbmap)
+int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map *dbmap)
{
int ret;
TDB_DATA data, outdata;
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_DBMAP, data,
- ctdb, &outdata, &res);
+ mem_ctx, &outdata, &res);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getvnnmap failed\n"));
return -1;
}
dbmap->num = ((uint32_t *)outdata.dptr)[0];
- if (dbmap->dbids) {
- talloc_free(dbmap->dbids);
- dbmap->dbids=NULL;
- }
- dbmap->dbids=talloc_array(dbmap, uint32_t, dbmap->num);
+ dbmap->dbids=talloc_array(mem_ctx, uint32_t, dbmap->num);
if (!dbmap->dbids) {
DEBUG(0,(__location__ " failed to talloc dbmap\n"));
return -1;
/*
set vnn map on a node
*/
-int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_vnn_map *vnnmap)
+int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
{
int ret;
TDB_DATA *data, outdata;
int32_t i, res;
- data = talloc_zero(ctdb, TDB_DATA);
+ data = talloc_zero(mem_ctx, TDB_DATA);
data->dsize = (vnnmap->size+2)*sizeof(uint32_t);
data->dptr = (unsigned char *)talloc_array(data, uint32_t, vnnmap->size+2);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SETVNNMAP, *data,
- ctdb, &outdata, &res);
+ mem_ctx, &outdata, &res);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for setvnnmap failed\n"));
return -1;
/*
get all keys and records for a specific database
*/
-int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, struct ctdb_key_list *keys)
+int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx, struct ctdb_key_list *keys)
{
int i, ret;
TDB_DATA indata, outdata;
int32_t res;
unsigned char *ptr;
- indata.dsize = sizeof(uint32_t);
- indata.dptr = (unsigned char *)&dbid;
+ indata.dsize = 2*sizeof(uint32_t);
+ indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
+
+ ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
+ ((uint32_t *)(&indata.dptr[0]))[1] = lmaster;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_PULL_DB, indata,
/*
copy a tdb from one node to another node
*/
-int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx)
+int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx)
{
int ret;
TDB_DATA indata, outdata;
int32_t res;
- indata.dsize = sizeof(uint32_t);
- indata.dptr = (unsigned char *)&dbid;
+ indata.dsize = 2*sizeof(uint32_t);
+ indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
+
+ ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
+ ((uint32_t *)(&indata.dptr[0]))[1] = lmaster;
ret = ctdb_control(ctdb, sourcenode, 0,
CTDB_CONTROL_PULL_DB, indata,
struct getkeys_params {
struct ctdb_db_context *ctdb_db;
TDB_DATA *outdata;
+ uint32_t lmaster;
};
static int traverse_getkeys(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
struct ctdb_db_context *ctdb_db = talloc_get_type(params->ctdb_db, struct ctdb_db_context);
unsigned char *ptr;
int len;
+ uint32_t lmaster;
+
+ lmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
+
+ /* only include this record if the lmaster matches or if
+ the wildcard lmaster (-1) was specified.
+ */
+ if((lmaster!=CTDB_LMASTER_ANY)
+ && (lmaster!=params->lmaster) ){
+ return 0;
+ }
len=outdata->dsize;
len+=4; /*lmaster*/
/* number of records is stored as the second 4 bytes */
((uint32_t *)(&outdata->dptr[0]))[1]++;
- *((uint32_t *)ptr)=ctdb_lmaster(ctdb_db->ctdb, &key);
+ *((uint32_t *)ptr)=lmaster;
ptr+=4;
*((uint32_t *)ptr)=key.dsize;
}
case CTDB_CONTROL_PULL_DB: {
- uint32_t dbid;
+ uint32_t dbid, lmaster;
struct ctdb_db_context *ctdb_db;
struct getkeys_params params;
- dbid = *((uint32_t *)(&indata.dptr[0]));
+ dbid = ((uint32_t *)(&indata.dptr[0]))[0];
ctdb_db = find_ctdb_db(ctdb, dbid);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
return -1;
}
+ lmaster = ((uint32_t *)(&indata.dptr[0]))[1];
+
outdata->dsize = 2* sizeof(uint32_t);
outdata->dptr = (unsigned char *)talloc_array(outdata, uint32_t, 2);
((uint32_t *)(&outdata->dptr[0]))[0]=dbid;
params.ctdb_db = ctdb_db;
params.outdata = outdata;
+ params.lmaster = lmaster;
tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_getkeys, ¶ms);
unsigned char *ptr;
int i, ret;
TDB_DATA key, data;
- struct ctdb_ltdb_header header;
+ struct ctdb_ltdb_header *hdr, header;
outdata->dsize = 0;
outdata->dptr = NULL;
ptr+=(key.dsize+CTDB_DS_ALIGNMENT-1)& ~(CTDB_DS_ALIGNMENT-1);
/* header */
- memcpy(&header, ptr, sizeof(struct ctdb_ltdb_header));
+ hdr = (struct ctdb_ltdb_header *)ptr;
ptr+=(sizeof(struct ctdb_ltdb_header)+CTDB_DS_ALIGNMENT-1)& ~(CTDB_DS_ALIGNMENT-1);
/* data */
DEBUG(0, (__location__ "Unable to lock db\n"));
return -1;
}
- ret = ctdb_ltdb_store(ctdb_db, key, &header, data);
+ ret = ctdb_ltdb_fetch(ctdb_db, key, &header, outdata, NULL);
if (ret != 0) {
- DEBUG(0, (__location__ "Unable to store record\n"));
+ DEBUG(0, (__location__ "Unable to fetch record\n"));
ctdb_ltdb_unlock(ctdb_db, key);
return -1;
}
+ if (header.rsn > hdr->rsn) {
+ ret = ctdb_ltdb_store(ctdb_db, key, hdr, data);
+ if (ret != 0) {
+ DEBUG(0, (__location__ "Unable to store record\n"));
+ ctdb_ltdb_unlock(ctdb_db, key);
+ return -1;
+ }
+ }
ctdb_ltdb_unlock(ctdb_db, key);
}
printf(" cleardb <vnn> <dbid> deletes all records in a db\n");
printf(" getrecmode <vnn> get recovery mode\n");
printf(" setrecmode <vnn> <mode> set recovery mode\n");
+ printf(" recover recover the cluster\n");
exit(1);
}
return 0;
}
+
+/*
+ perform a samba3 style recovery
+ */
+static int control_recover(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ uint32_t vnn, num_nodes, generation, dmaster;
+ struct ctdb_vnn_map vnnmap;
+ struct ctdb_node_map nodemap;
+ int i, j, ret;
+ struct ctdb_dbid_map dbmap;
+
+ vnn = ctdb_get_vnn(ctdb);
+ printf("recover\n");
+ printf("this vnn:%d\n",vnn);
+
+ /* 1: find a list of all nodes */
+ printf("\n1: fetching list of nodes\n");
+ ret = ctdb_ctrl_getnodemap(ctdb, vnn, ctdb, &nodemap);
+ if (ret != 0) {
+ printf("Unable to get nodemap from node %u\n", vnn);
+ return ret;
+ }
+
+ /* 2: count the active nodes */
+ printf("\n2: count number of active nodes\n");
+ num_nodes = 0;
+ for (i=0; i<nodemap.num; i++) {
+ if (nodemap.nodes[i].flags&NODE_FLAGS_CONNECTED) {
+ num_nodes++;
+ }
+ }
+ printf("number of active nodes:%d\n",num_nodes);
+
+ /* 3: go to all active nodes and activate recovery mode */
+ printf("\n3: set recovery mode for all active nodes\n");
+ for (j=0; j<nodemap.num; j++) {
+ /* dont change it for nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("setting node %d to recovery mode\n",nodemap.nodes[j].vnn);
+ ret = ctdb_ctrl_setrecmode(ctdb, nodemap.nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
+ if (ret != 0) {
+ printf("Unable to set recmode on node %u\n", nodemap.nodes[j].vnn);
+ return ret;
+ }
+ }
+
+ /* 4: get a list of all databases */
+ printf("\n4: getting list of databases to recover\n");
+ ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap);
+ if (ret != 0) {
+ printf("Unable to get dbids from node %u\n", vnn);
+ return ret;
+ }
+ for (i=0;i<dbmap.num;i++) {
+ const char *path;
+
+ ctdb_ctrl_getdbpath(ctdb, dbmap.dbids[i], ctdb, &path);
+ printf("dbid:0x%08x path:%s\n", dbmap.dbids[i], path);
+ }
+
+ /* 5: pull all records from all other nodes across to this node
+ (this merges based on rsn internally)
+ */
+ printf("\n5: merge all records from remote nodes\n");
+ for (i=0;i<dbmap.num;i++) {
+ printf("recovering database 0x%08x\n",dbmap.dbids[i]);
+ for (j=0; j<nodemap.num; j++) {
+ /* we dont need to merge with ourselves */
+ if (nodemap.nodes[j].vnn == vnn) {
+ continue;
+ }
+ /* dont merge from nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("merging all records from node %d for database 0x%08x\n", nodemap.nodes[j].vnn, dbmap.dbids[i]);
+ ret = ctdb_ctrl_copydb(ctdb, nodemap.nodes[j].vnn, vnn, dbmap.dbids[i], CTDB_LMASTER_ANY, ctdb);
+ if (ret != 0) {
+ printf("Unable to copy db from node %u to node %u\n", nodemap.nodes[j].vnn, vnn);
+ return ret;
+ }
+ }
+ }
+
+ /* 6: update dmaster to point to this node for all databases/nodes */
+ printf("\n6: repoint dmaster to the recovery node\n");
+ dmaster = vnn;
+ printf("new dmaster is %d\n", dmaster);
+ for (i=0;i<dbmap.num;i++) {
+ for (j=0; j<nodemap.num; j++) {
+ /* dont repoint nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("setting dmaster to %d for node %d db 0x%08x\n",dmaster,nodemap.nodes[j].vnn,dbmap.dbids[i]);
+ ret = ctdb_ctrl_setdmaster(ctdb, nodemap.nodes[j].vnn, ctdb, dbmap.dbids[i], dmaster);
+ if (ret != 0) {
+ printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap.nodes[j].vnn, dbmap.dbids[i]);
+ return ret;
+ }
+ }
+ }
+
+ /* 7: push all records out to the nodes again */
+ printf("\n7: push all records to remote nodes\n");
+ for (i=0;i<dbmap.num;i++) {
+ printf("distributing new database 0x%08x\n",dbmap.dbids[i]);
+ for (j=0; j<nodemap.num; j++) {
+ /* we dont need to push to ourselves */
+ if (nodemap.nodes[j].vnn == vnn) {
+ continue;
+ }
+ /* dont push to nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("pushing all records to node %d for database 0x%08x\n", nodemap.nodes[j].vnn, dbmap.dbids[i]);
+ ret = ctdb_ctrl_copydb(ctdb, vnn, nodemap.nodes[j].vnn, dbmap.dbids[i], CTDB_LMASTER_ANY, ctdb);
+ if (ret != 0) {
+ printf("Unable to copy db from node %u to node %u\n", vnn, nodemap.nodes[j].vnn);
+ return ret;
+ }
+ }
+ }
+
+ /* 8: build a new vnn map */
+ printf("\n8: build a new vnn map with a new generation id\n");
+ generation = random();
+ vnnmap.generation = generation;
+ vnnmap.size = num_nodes;
+ vnnmap.map = talloc_array(ctdb, uint32_t, num_nodes);
+ for (i=j=0;i<nodemap.num;i++) {
+ if (nodemap.nodes[i].flags&NODE_FLAGS_CONNECTED) {
+ vnnmap.map[j++]=nodemap.nodes[i].vnn;
+ }
+ }
+ printf("Generation:%d\n",vnnmap.generation);
+ printf("Size:%d\n",vnnmap.size);
+ for(i=0;i<vnnmap.size;i++){
+ printf("hash:%d lmaster:%d\n",i,vnnmap.map[i]);
+ }
+
+ /* 9: push the new vnn map out to all the nodes */
+ printf("\n9: distribute the new vnn map\n");
+ for (j=0; j<nodemap.num; j++) {
+ /* dont push to nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("setting new vnn map on node %d\n",nodemap.nodes[j].vnn);
+ ret = ctdb_ctrl_setvnnmap(ctdb, nodemap.nodes[j].vnn, ctdb, &vnnmap);
+ if (ret != 0) {
+ printf("Unable to set vnnmap for node %u\n", vnn);
+ return ret;
+ }
+ }
+
+ /* 10: disable recovery mode */
+ printf("\n10: restore recovery mode back to normal\n");
+ for (j=0; j<nodemap.num; j++) {
+ /* dont push to nodes that are unavailable */
+ if (!(nodemap.nodes[j].flags&NODE_FLAGS_CONNECTED)) {
+ continue;
+ }
+
+ printf("changing recovery mode back to normal for node %d\n",nodemap.nodes[j].vnn);
+ ret = ctdb_ctrl_setrecmode(ctdb, nodemap.nodes[j].vnn, CTDB_RECOVERY_NORMAL);
+ if (ret != 0) {
+ printf("Unable to set recmode on node %u\n", nodemap.nodes[j].vnn);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
/*
display remote ctdb vnn map
*/
dbid = strtoul(argv[1], NULL, 0);
mem_ctx = talloc_new(ctdb);
- ret = ctdb_ctrl_pulldb(ctdb, vnn, dbid, mem_ctx, &keys);
+ ret = ctdb_ctrl_pulldb(ctdb, vnn, dbid, CTDB_LMASTER_ANY, mem_ctx, &keys);
if (ret != 0) {
printf("Unable to get keys from node %u\n", vnn);
return ret;
dbid = strtoul(argv[2], NULL, 0);
mem_ctx = talloc_new(ctdb);
- ret = ctdb_ctrl_copydb(ctdb, fromvnn, tovnn, dbid, mem_ctx);
+ ret = ctdb_ctrl_copydb(ctdb, fromvnn, tovnn, dbid, CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", fromvnn, tovnn);
return ret;
{
uint32_t vnn;
int i, ret;
- struct ctdb_dbid_map *dbmap;
+ struct ctdb_dbid_map dbmap;
if (argc < 1) {
usage();
vnn = strtoul(argv[0], NULL, 0);
- dbmap = talloc_zero(ctdb, struct ctdb_dbid_map);
- ret = ctdb_ctrl_getdbmap(ctdb, vnn, dbmap);
+ ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
- talloc_free(dbmap);
return ret;
}
- printf("Number of databases:%d\n", dbmap->num);
- for(i=0;i<dbmap->num;i++){
+ printf("Number of databases:%d\n", dbmap.num);
+ for(i=0;i<dbmap.num;i++){
const char *path;
- ctdb_ctrl_getdbpath(ctdb, dbmap->dbids[i], dbmap, &path);
- printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
+ ctdb_ctrl_getdbpath(ctdb, dbmap.dbids[i], ctdb, &path);
+ printf("dbid:0x%08x path:%s\n", dbmap.dbids[i], path);
}
- talloc_free(dbmap);
+
return 0;
}
vnnmap->map[i] = strtoul(argv[3+i], NULL, 0);
}
- ret = ctdb_ctrl_setvnnmap(ctdb, vnn, vnnmap);
+ ret = ctdb_ctrl_setvnnmap(ctdb, vnn, ctdb, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return ret;
ret = control_debug(ctdb, extra_argc-1, extra_argv+1);
} else if (strcmp(control, "debuglevel") == 0) {
ret = control_debuglevel(ctdb, extra_argc-1, extra_argv+1);
+ } else if (strcmp(control, "recover") == 0) {
+ ret = control_recover(ctdb, extra_argc-1, extra_argv+1);
} else {
printf("Unknown control '%s'\n", control);
exit(1);