From: Martin Schwenke Date: Tue, 5 Apr 2016 05:26:22 +0000 (+1000) Subject: ctdb-recovery: Consistency check reclock in start recovery control X-Git-Tag: tdb-1.3.10~937 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=091d4d2dbbb87a620522e0b0f0ce9393cb21c466;p=thirdparty%2Fsamba.git ctdb-recovery: Consistency check reclock in start recovery control If the recovery lock setting is not consistent with that of the recovery master then abort. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 804dcecb1e1..bd6607ecf69 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -1124,40 +1124,123 @@ static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, talloc_free(state); } -/* - run the startrecovery eventscript - */ -int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, - struct ctdb_req_control_old *c, - bool *async_reply) +static void run_start_recovery_event(struct ctdb_context *ctdb, + struct recovery_callback_state *state) { int ret; - struct recovery_callback_state *state; - - DEBUG(DEBUG_NOTICE,(__location__ " startrecovery eventscript has been invoked\n")); - gettimeofday(&ctdb->last_recovery_started, NULL); - - state = talloc(ctdb, struct recovery_callback_state); - CTDB_NO_MEMORY(ctdb, state); - - state->c = talloc_steal(state, c); ctdb_disable_monitoring(ctdb); ret = ctdb_event_script_callback(ctdb, state, - ctdb_start_recovery_callback, + ctdb_start_recovery_callback, state, CTDB_EVENT_START_RECOVERY, "%s", ""); if (ret != 0) { - DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n")); + DEBUG(DEBUG_ERR,("Unable to run startrecovery event\n")); + ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL); talloc_free(state); - return -1; + return; + } + + return; +} + +static bool reclock_strings_equal(const char *a, const char *b) +{ + return (a == NULL && b == NULL) || + (a != NULL && b != NULL && strcmp(a, b) == 0); +} + +static void start_recovery_reclock_callback(struct ctdb_context *ctdb, + int32_t status, + TDB_DATA data, + const char *errormsg, + void *private_data) +{ + struct recovery_callback_state *state = talloc_get_type_abort( + private_data, struct recovery_callback_state); + const char *local = ctdb->recovery_lock_file; + const char *remote = NULL; + + if (status != 0) { + DEBUG(DEBUG_ERR, (__location__ " GET_RECLOCK failed\n")); + ctdb_request_control_reply(ctdb, state->c, NULL, + status, errormsg); + talloc_free(state); + return; + } + + /* Check reclock consistency */ + if (data.dsize > 0) { + /* Ensure NUL-termination */ + data.dptr[data.dsize-1] = '\0'; + remote = (const char *)data.dptr; + } + if (! reclock_strings_equal(local, remote)) { + /* Inconsistent */ + ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL); + DEBUG(DEBUG_ERR, + ("Recovery lock configuration inconsistent: " + "recmaster has %s, this node has %s, shutting down\n", + remote == NULL ? "NULL" : remote, + local == NULL ? "NULL" : local)); + talloc_free(state); + ctdb_shutdown_sequence(ctdb, 1); + } + DEBUG(DEBUG_INFO, + ("Recovery lock consistency check successful\n")); + + run_start_recovery_event(ctdb, state); +} + +/* Check recovery lock consistency and run eventscripts for the + * "startrecovery" event */ +int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + bool *async_reply) +{ + int ret; + struct recovery_callback_state *state; + uint32_t recmaster = c->hdr.srcnode; + + DEBUG(DEBUG_NOTICE, ("Running startrecovery event\n")); + gettimeofday(&ctdb->last_recovery_started, NULL); + + state = talloc(ctdb, struct recovery_callback_state); + CTDB_NO_MEMORY(ctdb, state); + + state->c = c; + + /* Although the recovery master sent this node a start + * recovery control, this node might still think the recovery + * master is disconnected. In this case defer the recovery + * lock consistency check. */ + if (ctdb->nodes[recmaster]->flags & NODE_FLAGS_DISCONNECTED) { + run_start_recovery_event(ctdb, state); + } else { + /* Ask the recovery master about its reclock setting */ + ret = ctdb_daemon_send_control(ctdb, + recmaster, + 0, + CTDB_CONTROL_GET_RECLOCK_FILE, + 0, 0, + tdb_null, + start_recovery_reclock_callback, + state); + + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " GET_RECLOCK failed\n")); + talloc_free(state); + return -1; + } } /* tell the control that we will be reply asynchronously */ + state->c = talloc_steal(state, c); *async_reply = true; + return 0; }