From: Ronnie Sahlberg Date: Thu, 3 May 2007 22:30:18 +0000 (+1000) Subject: start working on a recovery daemon X-Git-Tag: tevent-0.9.20~348^2~2787^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ebc478749b3b940d9aff836762492800351f59b1;p=thirdparty%2Fsamba.git start working on a recovery daemon change ctdb_control so it takes a timeval pointer as argument. this is the timeout. if the node has not responded within hte timeout ctdb_control will return an error instead of hanging. if the timeval pointer is NULL then the call will block indefinitely if there is no response. this is used for now in the createdb control but all the helpers ctdb_ctrl_* should probably be updated to take a timeout parameter as well. (This used to be ctdb commit 1fe64b04869b17dbf123851b0fe09df8d28a6211) --- diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in index 154dc9abe8e..20410a8944c 100644 --- a/ctdb/Makefile.in +++ b/ctdb/Makefile.in @@ -40,7 +40,7 @@ CTDB_OBJ = $(CTDB_COMMON_OBJ) $(CTDB_TCP_OBJ) $(POPT_OBJ) OBJS = @TDB_OBJ@ @TALLOC_OBJ@ @LIBREPLACEOBJ@ @INFINIBAND_WRAPPER_OBJ@ $(EXTRA_OBJ) $(EVENTS_OBJ) $(CTDB_OBJ) $(UTIL_OBJ) -BINS = bin/ctdbd bin/ctdbd_test bin/ctdb_test bin/ctdb_bench bin/ctdb_messaging bin/ctdb_fetch bin/ctdb_fetch1 bin/lockwait bin/ctdb_control bin/ctdb_dump @INFINIBAND_BINS@ +BINS = bin/ctdbd bin/ctdbd_test bin/ctdb_test bin/ctdb_bench bin/ctdb_messaging bin/ctdb_fetch bin/ctdb_fetch1 bin/lockwait bin/ctdb_control bin/ctdb_dump bin/recoverd @INFINIBAND_BINS@ DIRS = lib bin @@ -75,6 +75,10 @@ bin/ctdb_dump: $(OBJS) tools/ctdb_dump.o @echo Linking $@ @$(CC) $(CFLAGS) -o $@ tools/ctdb_dump.o $(OBJS) $(LIB_FLAGS) +bin/recoverd: $(OBJS) direct/recoverd.o + @echo Linking $@ + @$(CC) $(CFLAGS) -o $@ direct/recoverd.o $(OBJS) $(LIB_FLAGS) + bin/ctdbd_test: $(OBJS) direct/ctdbd_test.o @echo Linking $@ @$(CC) $(CFLAGS) -o $@ direct/ctdbd_test.o diff --git a/ctdb/common/ctdb_client.c b/ctdb/common/ctdb_client.c index 297db8bc7af..c2916d24cbd 100644 --- a/ctdb/common/ctdb_client.c +++ b/ctdb/common/ctdb_client.c @@ -644,17 +644,30 @@ static void ctdb_client_reply_control(struct ctdb_context *ctdb, } +/* time out handler for ctdb_control */ +static void timeout_func(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + uint32_t *timed_out = (uint32_t *)private_data; + + *timed_out = 1; +} + /* send a ctdb control message + timeout specifies how long we should wait for a reply. + if timeout is NULL we wait indefinitely */ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid, uint32_t opcode, uint32_t flags, TDB_DATA data, - TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status) + TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status, + struct timeval *timeout) { struct ctdb_client_control_state *state; struct ctdb_req_control *c; size_t len; int ret; + uint32_t timed_out; /* if the domain socket is not yet open, open it */ if (ctdb->daemon.sd==-1) { @@ -695,10 +708,19 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid, } /* semi-async operation */ - while (state->state == CTDB_CALL_WAIT) { + timed_out = 0; + if (timeout) { + event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, &timed_out); + } + while ((state->state == CTDB_CALL_WAIT) + && (timed_out == 0) ){ event_loop_once(ctdb->ev); } - + if (timed_out) { + talloc_free(state); + return -1; + } + if (outdata) { *outdata = state->outdata; outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize); @@ -727,7 +749,7 @@ int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PROCESS_EXISTS, 0, data, - NULL, NULL, &status); + NULL, NULL, &status, NULL); if (ret != 0) { DEBUG(0,(__location__ " ctdb_control for process_exists failed\n")); return -1; @@ -748,7 +770,7 @@ int ctdb_ctrl_status(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_s ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STATUS, 0, data, - ctdb, &data, &res); + ctdb, &data, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for status failed\n")); return -1; @@ -778,7 +800,7 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GETVNNMAP, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getvnnmap failed\n")); return -1; @@ -805,7 +827,7 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RECMODE, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n")); return -1; @@ -831,7 +853,7 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_RECMODE, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n")); return -1; @@ -852,7 +874,7 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DBMAP, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getdbmap failed\n")); return -1; @@ -881,7 +903,7 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, uint32_t destnode, ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_NODEMAP, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getnodes failed\n")); return -1; @@ -910,7 +932,7 @@ int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SETVNNMAP, 0, data, - ctdb, &outdata, &res); + ctdb, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for setvnnmap failed\n")); return -1; @@ -937,7 +959,7 @@ int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PULL_DB, 0, indata, - mem_ctx, &outdata, &res); + mem_ctx, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for pulldb failed\n")); return -1; @@ -1004,7 +1026,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de ret = ctdb_control(ctdb, sourcenode, 0, CTDB_CONTROL_PULL_DB, 0, indata, - mem_ctx, &outdata, &res); + mem_ctx, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for pulldb failed\n")); return -1; @@ -1012,7 +1034,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PUSH_DB, 0, outdata, - mem_ctx, NULL, &res); + mem_ctx, NULL, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for pushdb failed\n")); return -1; @@ -1038,7 +1060,7 @@ int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CT ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DMASTER, 0, indata, - mem_ctx, &outdata, &res); + mem_ctx, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for setdmaster failed\n")); return -1; @@ -1063,7 +1085,7 @@ int ctdb_ctrl_cleardb(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX * ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CLEAR_DB, 0, indata, - mem_ctx, &outdata, &res); + mem_ctx, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for cleardb failed\n")); return -1; @@ -1093,7 +1115,7 @@ int ctdb_ctrl_write_record(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_ ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_WRITE_RECORD, 0, indata, - mem_ctx, &outdata, &res); + mem_ctx, &outdata, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for write record failed\n")); return -1; @@ -1113,7 +1135,7 @@ int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode) ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0, - data, NULL, NULL, &res); + data, NULL, NULL, &res, NULL); if (ret != 0) { return -1; } @@ -1132,7 +1154,7 @@ int ctdb_ctrl_get_config(struct ctdb_context *ctdb) ZERO_STRUCT(data); ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_CONFIG, 0, - data, ctdb, &data, &res); + data, ctdb, &data, &res, NULL); if (ret != 0 || res != 0) { return -1; } @@ -1167,7 +1189,7 @@ int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *me ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_GETDBPATH, 0, data, - mem_ctx, &data, &res); + mem_ctx, &data, &res, NULL); if (ret != 0 || res != 0) { return -1; } @@ -1193,7 +1215,7 @@ int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint3 ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, data, - ctdb, &data, &res); + ctdb, &data, &res, NULL); if (ret != 0 || res != 0) { return -1; } @@ -1220,7 +1242,7 @@ int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint3 data.dsize = sizeof(level); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data, - NULL, NULL, &res); + NULL, NULL, &res, NULL); if (ret != 0 || res != 0) { return -1; } @@ -1273,7 +1295,7 @@ int ctdb_status_reset(struct ctdb_context *ctdb, uint32_t destnode) ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STATUS_RESET, 0, data, - NULL, NULL, &res); + NULL, NULL, &res, NULL); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for reset status failed\n")); return -1; @@ -1304,7 +1326,7 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name) /* tell ctdb daemon to attach */ ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_DB_ATTACH, - 0, data, ctdb_db, &data, &res); + 0, data, ctdb_db, &data, &res, NULL); if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) { DEBUG(0,("Failed to attach to database '%s'\n", name)); talloc_free(ctdb_db); @@ -1352,7 +1374,7 @@ int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id) data.dsize = sizeof(c); ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0, - data, NULL, NULL, &status); + data, NULL, NULL, &status, NULL); if (ret != 0 || status != 0) { DEBUG(0,("ctdb_set_call failed for call %u\n", id)); return -1; @@ -1387,7 +1409,7 @@ int ctdb_traverse_all(struct ctdb_db_context *ctdb_db, uint64_t srvid) data.dsize = sizeof(t); ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START, 0, - data, NULL, NULL, &status); + data, NULL, NULL, &status, NULL); if (ret != 0 || status != 0) { DEBUG(0,("ctdb_traverse_all failed\n")); return -1; diff --git a/ctdb/direct/recoverd.c b/ctdb/direct/recoverd.c new file mode 100644 index 00000000000..3c1b5f5dda9 --- /dev/null +++ b/ctdb/direct/recoverd.c @@ -0,0 +1,137 @@ +/* + ctdb recovery daemon + + Copyright (C) Ronnie Sahlberg 2007 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "includes.h" +#include "lib/events/events.h" +#include "system/filesys.h" +#include "popt.h" +#include "cmdline.h" +#include "../include/ctdb.h" +#include "../include/ctdb_private.h" + +static int timeout = 0; + +/* + show usage message + */ +static void usage(void) +{ + printf( + "Usage: recoverd\n" + ); + exit(1); +} + +void timeout_func(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + timeout = 1; +} + + +void recoverd(struct ctdb_context *ctdb, struct event_context *ev) +{ + uint32_t vnn; + TALLOC_CTX *mem_ctx=NULL; + +again: + if (mem_ctx) { + talloc_free(mem_ctx); + mem_ctx = NULL; + } + mem_ctx = talloc_new(ctdb); + if (!mem_ctx) { + DEBUG(0,("Failed to create temporary context\n")); + exit(-1); + } + + + /* we only check for recovery once every second */ + timeout = 0; + event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, ctdb); + while (!timeout) { + event_loop_once(ev); + } + + + /* get our vnn number */ + vnn = ctdb_get_vnn(ctdb); +printf("our node number is :%d\n",vnn); + + /* get number of nodes */ + +} + +/* + main program +*/ +int main(int argc, const char *argv[]) +{ + struct ctdb_context *ctdb; + struct poptOption popt_options[] = { + POPT_AUTOHELP + POPT_CTDB_CMDLINE + POPT_TABLEEND + }; + int opt; + const char **extra_argv; + int extra_argc = 0; + int ret; + poptContext pc; + struct event_context *ev; + + pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST); + + while ((opt = poptGetNextOpt(pc)) != -1) { + switch (opt) { + default: + fprintf(stderr, "Invalid option %s: %s\n", + poptBadOption(pc, 0), poptStrerror(opt)); + exit(1); + } + } + + /* setup the remaining options for the main program to use */ + extra_argv = poptGetArgs(pc); + if (extra_argv) { + extra_argv++; + while (extra_argv[extra_argc]) extra_argc++; + } + +#if 0 + if (extra_argc < 1) { + usage(); + } +#endif + + ev = event_context_init(NULL); + + /* initialise ctdb */ + ctdb = ctdb_cmdline_client(ev); + if (ctdb == NULL) { + printf("Failed to init ctdb\n"); + exit(1); + } + + + recoverd(ctdb, ev); + + return ret; +} diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 125f71d40ff..22bcedf197c 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -642,7 +642,8 @@ int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id, int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid, uint32_t opcode, uint32_t flags, TDB_DATA data, - TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status); + TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status, + struct timeval *timeout); diff --git a/ctdb/tools/ctdb_control.c b/ctdb/tools/ctdb_control.c index c4a93dd7f02..2def6eaef7e 100644 --- a/ctdb/tools/ctdb_control.c +++ b/ctdb/tools/ctdb_control.c @@ -762,6 +762,7 @@ static int control_createdb(struct ctdb_context *ctdb, int argc, const char **ar int ret; int32_t res; TDB_DATA data; + struct timeval timeout; if (argc < 2) { usage(); @@ -773,8 +774,10 @@ static int control_createdb(struct ctdb_context *ctdb, int argc, const char **ar /* tell ctdb daemon to attach */ data.dptr = discard_const(dbname); data.dsize = strlen(dbname)+1; + timeout = timeval_current_ofs(1, 0); ret = ctdb_control(ctdb, vnn, 0, CTDB_CONTROL_DB_ATTACH, - 0, data, ctdb, &data, &res); + 0, data, ctdb, &data, &res, + &timeout); if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) { DEBUG(0,("Failed to attach to database '%s'\n", dbname)); return -1;