From: Martin Schwenke Date: Tue, 19 Jan 2016 09:33:58 +0000 (+1100) Subject: ctdb-recovery: Reimplement ctdb_recovery_lock() using ctdb_cluster_mutex() X-Git-Tag: talloc-2.1.7~107 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=64d557200ed63e1ff21cd0078e86957b689eff7e;p=thirdparty%2Fsamba.git ctdb-recovery: Reimplement ctdb_recovery_lock() using ctdb_cluster_mutex() Replace the file descriptor for the recovery lock in the CTDB context with the cluster mutex handle, where non-NULL means locked. Attempting to take the recovery lock is now asynchronous and no longer blocks the recovery daemon. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 41b9f4f6ce9..edd451b2c1f 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -280,6 +280,8 @@ struct ctdb_daemon_data { } +struct ctdb_cluster_mutex_handle; + enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN}; #define NUM_DB_PRIORITIES 3 @@ -309,7 +311,7 @@ struct ctdb_context { uint64_t max_persistent_check_errors; const char *transport; char *recovery_lock_file; - int recovery_lock_fd; + struct ctdb_cluster_mutex_handle *recovery_lock_handle; uint32_t pnn; /* our own pnn */ uint32_t num_nodes; uint32_t num_connected; diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 8314388930c..f58f9a6da2a 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -1137,60 +1137,73 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, bool ctdb_recovery_have_lock(struct ctdb_context *ctdb) { - return ctdb->recovery_lock_fd != -1; + return (ctdb->recovery_lock_handle != NULL); } -/* - try and get the recovery lock in shared storage - should only work - on the recovery master recovery daemon. Anywhere else is a bug - */ -bool ctdb_recovery_lock(struct ctdb_context *ctdb) +struct hold_reclock_state { + bool done; + char status; +}; + +static void hold_reclock_handler(struct ctdb_context *ctdb, + char status, + double latency, + struct ctdb_cluster_mutex_handle *h, + void *private_data) { - struct flock lock; + struct hold_reclock_state *s = + (struct hold_reclock_state *) private_data; + + switch (status) { + case '0': + ctdb->recovery_lock_handle = h; + break; - ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, - O_RDWR|O_CREAT, 0600); - if (ctdb->recovery_lock_fd == -1) { + case '1': DEBUG(DEBUG_ERR, - ("ctdb_recovery_lock: Unable to open %s - (%s)\n", - ctdb->recovery_lock_file, strerror(errno))); - return false; + ("Unable to take recovery lock - contention\n")); + talloc_free(h); + break; + + default: + DEBUG(DEBUG_ERR, ("ERROR: when taking recovery lock\n")); + talloc_free(h); } - set_close_on_exec(ctdb->recovery_lock_fd); - - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 1; - lock.l_pid = 0; - - if (fcntl(ctdb->recovery_lock_fd, F_SETLK, &lock) != 0) { - int saved_errno = errno; - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - /* Fail silently on these errors, since they indicate - * lock contention, but log an error for any other - * failure. */ - if (saved_errno != EACCES && - saved_errno != EAGAIN) { - DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Failed to get " - "recovery lock on '%s' - (%s)\n", - ctdb->recovery_lock_file, - strerror(saved_errno))); - } - return false; + s->done = true; + s->status = status; +} + +bool ctdb_recovery_lock(struct ctdb_context *ctdb) +{ + struct ctdb_cluster_mutex_handle *h; + struct hold_reclock_state s = { + .done = false, + .status = '0', + }; + + h = ctdb_cluster_mutex(ctdb, 0); + if (h == NULL) { + return -1; } - return true; + h->handler = hold_reclock_handler; + h->private_data = &s; + + while (!s.done) { + tevent_loop_once(ctdb->ev); + } + + h->private_data = NULL; + + return (s.status == '0'); } void ctdb_recovery_unlock(struct ctdb_context *ctdb) { - if (ctdb->recovery_lock_fd != -1) { + if (ctdb->recovery_lock_handle != NULL) { DEBUG(DEBUG_NOTICE, ("Releasing recovery lock\n")); - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; + TALLOC_FREE(ctdb->recovery_lock_handle); } } diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c index 5fc1db6318d..5d6b4be36cd 100644 --- a/ctdb/server/ctdbd.c +++ b/ctdb/server/ctdbd.c @@ -195,7 +195,7 @@ int main(int argc, const char *argv[]) ctdb->recovery_mode = CTDB_RECOVERY_NORMAL; ctdb->recovery_master = (uint32_t)-1; ctdb->upcalls = &ctdb_upcalls; - ctdb->recovery_lock_fd = -1; + ctdb->recovery_lock_handle = NULL; TALLOC_FREE(ctdb->idr); ret = reqid_init(ctdb, 0, &ctdb->idr);;