From 9cf77dd23f26b5fdbf2c46200f1f0f28d4aac145 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 12 May 2007 15:15:27 +1000 Subject: [PATCH] separate out the freeze/thaw handling from recovery (This used to be ctdb commit 0b0640bd8b8334961f240e0cf276ac112cd6e616) --- ctdb/Makefile.in | 2 +- ctdb/common/ctdb_client.c | 40 +++++- ctdb/common/ctdb_control.c | 40 +++--- ctdb/common/ctdb_freeze.c | 262 ++++++++++++++++++++++++++++++++++++ ctdb/common/ctdb_recover.c | 232 ++----------------------------- ctdb/direct/ctdb_recoverd.c | 16 +++ ctdb/include/ctdb.h | 3 + ctdb/include/ctdb_private.h | 11 +- ctdb/tools/ctdb_control.c | 77 +++++++++++ 9 files changed, 440 insertions(+), 243 deletions(-) create mode 100644 ctdb/common/ctdb_freeze.c diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in index dd6ab03978b..58bc2694f9b 100644 --- a/ctdb/Makefile.in +++ b/ctdb/Makefile.in @@ -29,7 +29,7 @@ CTDB_COMMON_OBJ = common/ctdb.o common/ctdb_daemon.o common/ctdb_client.o \ common/ctdb_io.o common/util.o common/ctdb_util.o \ common/ctdb_call.o common/ctdb_ltdb.o common/ctdb_lockwait.o \ common/ctdb_message.o common/cmdline.o common/ctdb_control.o \ - lib/util/debug.o common/ctdb_recover.o common/ctdb_traverse.o + lib/util/debug.o common/ctdb_recover.o common/ctdb_freeze.o common/ctdb_traverse.o CTDB_TCP_OBJ = tcp/tcp_connect.o tcp/tcp_io.o tcp/tcp_init.o diff --git a/ctdb/common/ctdb_client.c b/ctdb/common/ctdb_client.c index e568acd2c74..db545e017d4 100644 --- a/ctdb/common/ctdb_client.c +++ b/ctdb/common/ctdb_client.c @@ -937,7 +937,7 @@ int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, ui CTDB_CONTROL_SET_RECMASTER, 0, data, ctdb, &outdata, &res, &timeout); if (ret != 0 || res != 0) { - DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n")); + DEBUG(0,(__location__ " ctdb_control for setrecmaster failed\n")); return -1; } @@ -1640,3 +1640,41 @@ int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t return 0; } + +/* + freeze a node + */ +int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode) +{ + int ret; + int32_t res; + + ret = ctdb_control(ctdb, destnode, 0, + CTDB_CONTROL_FREEZE, 0, tdb_null, + NULL, NULL, &res, &timeout); + if (ret != 0 || res != 0) { + DEBUG(0,(__location__ " ctdb_control freeze failed\n")); + return -1; + } + + return 0; +} + +/* + thaw a node + */ +int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode) +{ + int ret; + int32_t res; + + ret = ctdb_control(ctdb, destnode, 0, + CTDB_CONTROL_THAW, 0, tdb_null, + NULL, NULL, &res, &timeout); + if (ret != 0 || res != 0) { + DEBUG(0,(__location__ " ctdb_control thaw failed\n")); + return -1; + } + + return 0; +} diff --git a/ctdb/common/ctdb_control.c b/ctdb/common/ctdb_control.c index f921d4fa1bf..af8df80e958 100644 --- a/ctdb/common/ctdb_control.c +++ b/ctdb/common/ctdb_control.c @@ -38,11 +38,15 @@ struct ctdb_control_state { process a control request */ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, - uint32_t opcode, - uint64_t srvid, uint32_t client_id, + struct ctdb_req_control *c, TDB_DATA indata, - TDB_DATA *outdata, uint32_t srcnode) + TDB_DATA *outdata, uint32_t srcnode, + bool *async_reply) { + uint32_t opcode = c->opcode; + uint64_t srvid = c->srvid; + uint32_t client_id = c->client_id; + switch (opcode) { case CTDB_CONTROL_PROCESS_EXISTS: { CHECK_CONTROL_DATA_SIZE(sizeof(pid_t)); @@ -111,11 +115,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_PUSH_DB: return ctdb_control_push_db(ctdb, indata); - case CTDB_CONTROL_SET_RECMODE: { - ctdb->recovery_mode = ((uint32_t *)(&indata.dptr[0]))[0]; - return 0; - } - case CTDB_CONTROL_GET_RECMODE: { return ctdb->recovery_mode; } @@ -221,6 +220,18 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); return ctdb_ltdb_set_seqnum_frequency(ctdb, *(uint32_t *)indata.dptr); + case CTDB_CONTROL_FREEZE: + CHECK_CONTROL_DATA_SIZE(0); + return ctdb_control_freeze(ctdb, c, async_reply); + + case CTDB_CONTROL_THAW: + CHECK_CONTROL_DATA_SIZE(0); + return ctdb_control_thaw(ctdb); + + case CTDB_CONTROL_SET_RECMODE: + CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); + return ctdb_control_set_recmode(ctdb, indata); + default: DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; @@ -267,21 +278,18 @@ void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr struct ctdb_req_control *c = (struct ctdb_req_control *)hdr; TDB_DATA data, *outdata; int32_t status; + bool async_reply = False; data.dptr = &c->data[0]; data.dsize = c->datalen; outdata = talloc_zero(c, TDB_DATA); - if (c->opcode == CTDB_CONTROL_SET_RECMODE) { - /* this function operates asynchronously */ - ctdb_control_set_recmode(ctdb, c, data); - return; - } + status = ctdb_control_dispatch(ctdb, c, data, outdata, hdr->srcnode, &async_reply); - status = ctdb_control_dispatch(ctdb, c->opcode, c->srvid, c->client_id, - data, outdata, hdr->srcnode); - ctdb_request_control_reply(ctdb, c, outdata, status); + if (!async_reply) { + ctdb_request_control_reply(ctdb, c, outdata, status); + } } /* diff --git a/ctdb/common/ctdb_freeze.c b/ctdb/common/ctdb_freeze.c new file mode 100644 index 00000000000..b31751d8523 --- /dev/null +++ b/ctdb/common/ctdb_freeze.c @@ -0,0 +1,262 @@ +/* + ctdb freeze handling + + Copyright (C) Andrew Tridgell 2007 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include "includes.h" +#include "lib/events/events.h" +#include "lib/tdb/include/tdb.h" +#include "system/network.h" +#include "system/filesys.h" +#include "system/wait.h" +#include "../include/ctdb_private.h" +#include "lib/util/dlinklist.h" +#include "db_wrap.h" + + +/* + lock all databases + */ +static int ctdb_lock_all_databases(struct ctdb_context *ctdb) +{ + struct ctdb_db_context *ctdb_db; + for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) { + return -1; + } + } + return 0; +} + +/* + lock all databases - mark only + */ +static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb) +{ + struct ctdb_db_context *ctdb_db; + for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) { + return -1; + } + } + return 0; +} + +/* + lock all databases - unmark only + */ +static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb) +{ + struct ctdb_db_context *ctdb_db; + for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { + if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) { + return -1; + } + } + return 0; +} + +/* + a list of control requests waiting for a freeze lock child to get + the database locks + */ +struct ctdb_freeze_waiter { + struct ctdb_freeze_waiter *next, *prev; + struct ctdb_context *ctdb; + struct ctdb_req_control *c; + int32_t status; +}; + +/* a handle to a freeze lock child process */ +struct ctdb_freeze_handle { + struct ctdb_context *ctdb; + pid_t child; + int fd; + struct ctdb_freeze_waiter *waiters; +}; + +/* + destroy a freeze handle + */ +static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h) +{ + if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) { + ctdb_lock_all_databases_unmark(h->ctdb); + } + h->ctdb->freeze_mode = CTDB_FREEZE_NONE; + kill(h->child, SIGKILL); + waitpid(h->child, NULL, 0); + return 0; +} + +/* + called when the child writes its status to us + */ +static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *private_data) +{ + struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle); + int32_t status; + struct ctdb_freeze_waiter *w; + int ret; + + if (read(h->fd, &status, sizeof(status)) != sizeof(status)) { + DEBUG(0,("read error from freeze lock child\n")); + status = -1; + } + + if (status == -1) { + DEBUG(0,("Failed to get locks in ctdb_freeze_child\n")); + /* we didn't get the locks - destroy the handle */ + talloc_free(h); + return; + } + + ret = ctdb_lock_all_databases_mark(h->ctdb); + if (ret == -1) { + DEBUG(0,("Failed to mark locks in ctdb_freeze\n")); + talloc_free(h); + return; + } + + h->ctdb->freeze_mode = CTDB_FREEZE_FROZEN; + + /* notify the waiters */ + while ((w = h->ctdb->freeze_handle->waiters)) { + w->status = status; + DLIST_REMOVE(h->ctdb->freeze_handle->waiters, w); + talloc_free(w); + } + + talloc_free(fde); +} + +/* + create a child which gets locks on all the open databases, then calls the callback telling the parent + that it is done + */ +static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb) +{ + struct ctdb_freeze_handle *h; + int fd[2]; + struct fd_event *fde; + + h = talloc_zero(ctdb, struct ctdb_freeze_handle); + CTDB_NO_MEMORY_VOID(ctdb, h); + + h->ctdb = ctdb; + + /* use socketpair() instead of pipe() so we have bi-directional fds */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) { + DEBUG(0,("Failed to create pipe for ctdb_freeze_lock\n")); + talloc_free(h); + return NULL; + } + + h->child = fork(); + if (h->child == -1) { + DEBUG(0,("Failed to fork child for ctdb_freeze_lock\n")); + talloc_free(h); + return NULL; + } + + if (h->child == 0) { + int ret; + /* in the child */ + close(fd[0]); + ret = ctdb_lock_all_databases(ctdb); + if (ret != 0) { + _exit(0); + } + write(fd[1], &ret, sizeof(ret)); + /* the read here means we will die if the parent exits */ + read(fd[1], &ret, sizeof(ret)); + _exit(0); + } + + talloc_set_destructor(h, ctdb_freeze_handle_destructor); + + close(fd[1]); + + h->fd = fd[0]; + + fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, + ctdb_freeze_lock_handler, h); + if (fde == NULL) { + DEBUG(0,("Failed to setup fd event for ctdb_freeze_lock\n")); + close(fd[0]); + talloc_free(h); + return NULL; + } + + return h; +} + +/* + destroy a waiter for a freeze mode change + */ +static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w) +{ + DLIST_REMOVE(w->ctdb->freeze_handle->waiters, w); + ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status); + return 0; +} + +/* + freeze the databases + */ +int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply) +{ + struct ctdb_freeze_waiter *w; + + if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) { + /* we're already frozen */ + return 0; + } + + /* if there isn't a freeze lock child then create one */ + if (!ctdb->freeze_handle) { + ctdb->freeze_handle = ctdb_freeze_lock(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->freeze_handle); + ctdb->freeze_mode = CTDB_FREEZE_PENDING; + } + + /* add ourselves to list of waiters */ + w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter); + CTDB_NO_MEMORY(ctdb, w); + w->ctdb = ctdb; + w->c = talloc_steal(w, c); + w->status = -1; + talloc_set_destructor(w, ctdb_freeze_waiter_destructor); + DLIST_ADD(ctdb->freeze_handle->waiters, w); + + /* we won't reply till later */ + *async_reply = True; + return 0; +} + + + +/* + thaw the databases + */ +int32_t ctdb_control_thaw(struct ctdb_context *ctdb) +{ + talloc_free(ctdb->freeze_handle); + ctdb->freeze_handle = NULL; + return 0; +} diff --git a/ctdb/common/ctdb_recover.c b/ctdb/common/ctdb_recover.c index 419426f76fa..1156a37e789 100644 --- a/ctdb/common/ctdb_recover.c +++ b/ctdb/common/ctdb_recover.c @@ -1,5 +1,5 @@ /* - ctdb_control protocol code + ctdb recovery code Copyright (C) Andrew Tridgell 2007 Copyright (C) Ronnie Sahlberg 2007 @@ -368,231 +368,17 @@ int32_t ctdb_control_clear_db(struct ctdb_context *ctdb, TDB_DATA indata) return 0; } - -/* - lock all databases - */ -static int ctdb_lock_all_databases(struct ctdb_context *ctdb) -{ - struct ctdb_db_context *ctdb_db; - for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { - if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) { - return -1; - } - } - return 0; -} - -/* - lock all databases - mark only - */ -static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb) -{ - struct ctdb_db_context *ctdb_db; - for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { - if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) { - return -1; - } - } - return 0; -} - -/* - lock all databases - unmark only - */ -static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb) -{ - struct ctdb_db_context *ctdb_db; - for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) { - if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) { - return -1; - } - } - return 0; -} - -/* - a list of control requests waiting for a recovery lock child to gets - the database locks - */ -struct ctdb_recovery_waiter { - struct ctdb_recovery_waiter *next, *prev; - struct ctdb_context *ctdb; - struct ctdb_req_control *c; - int32_t status; -}; - -/* a handle to a recovery lock child process */ -struct ctdb_recovery_handle { - struct ctdb_context *ctdb; - pid_t child; - int fd; - struct ctdb_recovery_waiter *waiters; -}; - -/* - destroy a recovery handle - */ -static int ctdb_recovery_handle_destructor(struct ctdb_recovery_handle *h) -{ - if (h->ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) { - ctdb_lock_all_databases_unmark(h->ctdb); - } - kill(h->child, SIGKILL); - waitpid(h->child, NULL, 0); - return 0; -} - -/* - called when the child writes its status to us - */ -static void ctdb_recovery_lock_handler(struct event_context *ev, struct fd_event *fde, - uint16_t flags, void *private_data) -{ - struct ctdb_recovery_handle *h = talloc_get_type(private_data, struct ctdb_recovery_handle); - int32_t status; - struct ctdb_recovery_waiter *w; - int ret; - - if (read(h->fd, &status, sizeof(status)) != sizeof(status)) { - DEBUG(0,("read error from recovery lock child\n")); - status = -1; - } - - if (status == -1) { - DEBUG(0,("Failed to get locks in ctdb_recovery_child\n")); - /* we didn't get the locks - destroy the handle */ - talloc_free(h); - return; - } - - ret = ctdb_lock_all_databases_mark(h->ctdb); - if (ret == -1) { - DEBUG(0,("Failed to mark locks in ctdb_recovery\n")); - talloc_free(h); - return; - } - - h->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; - - /* notify the waiters */ - while ((w = h->ctdb->recovery_handle->waiters)) { - w->status = status; - DLIST_REMOVE(h->ctdb->recovery_handle->waiters, w); - talloc_free(w); - } - - talloc_free(fde); -} - -/* - create a child which gets locks on all the open databases, then calls the callback telling the parent - that it is done - */ -static struct ctdb_recovery_handle *ctdb_recovery_lock(struct ctdb_context *ctdb) -{ - struct ctdb_recovery_handle *h; - int fd[2]; - struct fd_event *fde; - - h = talloc_zero(ctdb, struct ctdb_recovery_handle); - CTDB_NO_MEMORY_VOID(ctdb, h); - - h->ctdb = ctdb; - - /* use socketpair() instead of pipe() so we have bi-directional fds */ - if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) { - DEBUG(0,("Failed to create pipe for ctdb_recovery_lock\n")); - talloc_free(h); - return NULL; - } - - h->child = fork(); - if (h->child == -1) { - DEBUG(0,("Failed to fork child for ctdb_recovery_lock\n")); - talloc_free(h); - return NULL; - } - - if (h->child == 0) { - int ret; - /* in the child */ - close(fd[0]); - ret = ctdb_lock_all_databases(ctdb); - if (ret != 0) { - _exit(0); - } - write(fd[1], &ret, sizeof(ret)); - /* the read here means we will die if the parent exits */ - read(fd[1], &ret, sizeof(ret)); - _exit(0); - } - - talloc_set_destructor(h, ctdb_recovery_handle_destructor); - - close(fd[1]); - - h->fd = fd[0]; - - fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, - ctdb_recovery_lock_handler, h); - if (fde == NULL) { - DEBUG(0,("Failed to setup fd event for ctdb_recovery_lock\n")); - close(fd[0]); - talloc_free(h); - return NULL; - } - - return h; -} - -/* - destroy a waiter for a recovery mode change - */ -static int ctdb_recovery_waiter_destructor(struct ctdb_recovery_waiter *w) -{ - DLIST_REMOVE(w->ctdb->recovery_handle->waiters, w); - ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status); - return 0; -} - /* set the recovery mode */ -void ctdb_control_set_recmode(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA data) +int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, TDB_DATA indata) { - uint32_t recmode = *(uint32_t *)data.dptr; - struct ctdb_recovery_waiter *w; - - if (recmode == CTDB_RECOVERY_NORMAL) { - /* switching to normal mode is easy */ - talloc_free(ctdb->recovery_handle); - ctdb->recovery_handle = NULL; - ctdb->recovery_mode = CTDB_RECOVERY_NORMAL; - ctdb_request_control_reply(ctdb, c, NULL, 0); - return; - } - - if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) { - /* we're already active */ - ctdb_request_control_reply(ctdb, c, NULL, 0); - return; - } - - /* if there isn't a recovery lock child then create one */ - if (!ctdb->recovery_handle) { - ctdb->recovery_handle = ctdb_recovery_lock(ctdb); - CTDB_NO_MEMORY_VOID(ctdb, ctdb->recovery_handle); + uint32_t recmode = *(uint32_t *)indata.dptr; + if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { + DEBUG(0,("Attempt to change recovery mode to %u when not frozen\n", + recmode)); + return -1; } - - /* add ourselves to list of waiters */ - w = talloc(ctdb->recovery_handle, struct ctdb_recovery_waiter); - CTDB_NO_MEMORY_VOID(ctdb, w); - w->ctdb = ctdb; - w->c = talloc_steal(w, c); - w->status = -1; - talloc_set_destructor(w, ctdb_recovery_waiter_destructor); - DLIST_ADD(ctdb->recovery_handle->waiters, w); + ctdb->recovery_mode = recmode; + return 0; } - - diff --git a/ctdb/direct/ctdb_recoverd.c b/ctdb/direct/ctdb_recoverd.c index 5ac4f635696..534507c0a6e 100644 --- a/ctdb/direct/ctdb_recoverd.c +++ b/ctdb/direct/ctdb_recoverd.c @@ -57,11 +57,27 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no continue; } + if (rec_mode == CTDB_RECOVERY_ACTIVE) { + ret = ctdb_ctrl_freeze(ctdb, timeval_current_ofs(5, 0), nodemap->nodes[j].vnn); + if (ret != 0) { + DEBUG(0, (__location__ " Unable to freeze node %u\n", nodemap->nodes[j].vnn)); + return -1; + } + } + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, rec_mode); if (ret != 0) { DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].vnn)); return -1; } + + if (rec_mode == CTDB_RECOVERY_NORMAL) { + ret = ctdb_ctrl_thaw(ctdb, timeval_current_ofs(5, 0), nodemap->nodes[j].vnn); + if (ret != 0) { + DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].vnn)); + return -1; + } + } } return 0; diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index 4be51948292..2508ea3e639 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -323,4 +323,7 @@ int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f); */ int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid); +int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode); +int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode); + #endif diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index f1948e121ad..50500d9741a 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -218,12 +218,14 @@ struct ctdb_write_record { unsigned char blob[1]; }; +enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN}; /* main state of the ctdb daemon */ struct ctdb_context { struct event_context *ev; uint32_t recovery_mode; - struct ctdb_recovery_handle *recovery_handle; + enum ctdb_freeze_mode freeze_mode; + struct ctdb_freeze_handle *freeze_handle; struct ctdb_address address; const char *name; const char *db_directory; @@ -345,6 +347,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS, CTDB_CONTROL_GET_PID, CTDB_CONTROL_GET_RECMASTER, CTDB_CONTROL_SET_RECMASTER, + CTDB_CONTROL_FREEZE, + CTDB_CONTROL_THAW, }; @@ -783,8 +787,11 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_clear_db(struct ctdb_context *ctdb, TDB_DATA indata); -void ctdb_control_set_recmode(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA data); +int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, TDB_DATA data); void ctdb_request_control_reply(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata, int32_t status); +int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply); +int32_t ctdb_control_thaw(struct ctdb_context *ctdb); + #endif diff --git a/ctdb/tools/ctdb_control.c b/ctdb/tools/ctdb_control.c index 10e958c1b07..6db4c8480d2 100644 --- a/ctdb/tools/ctdb_control.c +++ b/ctdb/tools/ctdb_control.c @@ -56,6 +56,8 @@ static void usage(void) " setrecmaster set recovery master\n" " attach attach a database\n" " getpid get the pid of a ctdb daemon\n" + " freeze freeze a node\n" + " thaw thaw a node\n" ); exit(1); } @@ -799,6 +801,79 @@ static int control_debug(struct ctdb_context *ctdb, int argc, const char **argv) } +/* + freeze a node + */ +static int control_freeze(struct ctdb_context *ctdb, int argc, const char **argv) +{ + int ret=0; + uint32_t vnn, i; + uint32_t *nodes; + uint32_t num_nodes; + + if (argc < 1) { + usage(); + } + + if (strcmp(argv[0], "all") != 0) { + vnn = strtoul(argv[0], NULL, 0); + ret = ctdb_ctrl_freeze(ctdb, timeval_current_ofs(5, 0), vnn); + if (ret != 0) { + printf("Unable to freeze node %u\n", vnn); + } + return 0; + } + + nodes = ctdb_get_connected_nodes(ctdb, timeval_current_ofs(1, 0), ctdb, &num_nodes); + CTDB_NO_MEMORY(ctdb, nodes); + for (i=0;i