CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL);
- ret = ctdb_event_script_callback(ctdb, ctdb,
+ ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb,
ctdb_start_transport, NULL, "startup");
if (ret != 0) {
DEBUG(0,("Failed startup event script\n"));
see if any nodes are dead
*/
static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+ struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int i;
if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
- event_add_timed(ctdb->ev, ctdb,
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
ctdb_check_for_dead_nodes, ctdb);
return;
node->tx_cnt = 0;
}
- event_add_timed(ctdb->ev, ctdb,
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
ctdb_check_for_dead_nodes, ctdb);
}
TDB_DATA data;
struct ctdb_node_flag_change c;
- event_add_timed(ctdb->ev, ctdb,
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
int ret;
if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
- event_add_timed(ctdb->ev, ctdb,
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
return;
}
- ret = ctdb_event_script_callback(ctdb, ctdb, ctdb_health_callback, ctdb, "monitor");
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
if (ret != 0) {
DEBUG(0,("Unable to launch monitor event script\n"));
- event_add_timed(ctdb->ev, ctdb,
+ event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
}
}
+/* stop any monitoring */
+void ctdb_stop_monitoring(struct ctdb_context *ctdb)
+{
+ talloc_free(ctdb->monitor_context);
+ ctdb->monitor_context = talloc_new(ctdb);
+ CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context);
+}
/*
start watching for nodes that might be dead
*/
-int ctdb_start_monitoring(struct ctdb_context *ctdb)
+void ctdb_start_monitoring(struct ctdb_context *ctdb)
{
- event_add_timed(ctdb->ev, ctdb,
- timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
- ctdb_check_for_dead_nodes, ctdb);
- event_add_timed(ctdb->ev, ctdb,
- timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
- ctdb_check_health, ctdb);
- return 0;
+ struct timed_event *te;
+
+ ctdb_stop_monitoring(ctdb);
+
+ te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+ timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
+ ctdb_check_for_dead_nodes, ctdb);
+ CTDB_NO_MEMORY_FATAL(ctdb, te);
+
+ te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+ timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
+ ctdb_check_health, ctdb);
+ CTDB_NO_MEMORY_FATAL(ctdb, te);
}
{
struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
+ ctdb_start_monitoring(ctdb);
+
if (status == 0) {
ctdb->recovery_mode = state->recmode;
} else {
state->c = talloc_steal(state, c);
state->recmode = recmode;
+
+ ctdb_stop_monitoring(ctdb);
+
/* call the events script to tell all subsystems that we have recovered */
- ret = ctdb_event_script_callback(ctdb, state,
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ state,
ctdb_recovered_callback,
state, "recovered");
if (ret != 0) {
uint32_t default_v;
size_t offset;
} tunable_map[] = {
- { "MaxRedirectCount", 3, offsetof(struct ctdb_tunable, max_redirect_count) },
- { "SeqnumFrequency", 1, offsetof(struct ctdb_tunable, seqnum_frequency) },
- { "ControlTimeout", 60, offsetof(struct ctdb_tunable, control_timeout) },
- { "TraverseTimeout", 20, offsetof(struct ctdb_tunable, traverse_timeout) },
- { "KeepaliveInterval", 2, offsetof(struct ctdb_tunable, keepalive_interval) },
- { "KeepaliveLimit", 3, offsetof(struct ctdb_tunable, keepalive_limit) },
- { "MaxLACount", 7, offsetof(struct ctdb_tunable, max_lacount) },
- { "RecoverTimeout", 5, offsetof(struct ctdb_tunable, recover_timeout) },
- { "RecoverInterval", 1, offsetof(struct ctdb_tunable, recover_interval) },
- { "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) },
- { "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
- { "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
+ { "MaxRedirectCount", 3, offsetof(struct ctdb_tunable, max_redirect_count) },
+ { "SeqnumFrequency", 1, offsetof(struct ctdb_tunable, seqnum_frequency) },
+ { "ControlTimeout", 60, offsetof(struct ctdb_tunable, control_timeout) },
+ { "TraverseTimeout", 20, offsetof(struct ctdb_tunable, traverse_timeout) },
+ { "KeepaliveInterval", 2, offsetof(struct ctdb_tunable, keepalive_interval) },
+ { "KeepaliveLimit", 3, offsetof(struct ctdb_tunable, keepalive_limit) },
+ { "MaxLACount", 7, offsetof(struct ctdb_tunable, max_lacount) },
+ { "RecoverTimeout", 5, offsetof(struct ctdb_tunable, recover_timeout) },
+ { "RecoverInterval", 1, offsetof(struct ctdb_tunable, recover_interval) },
+ { "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) },
+ { "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
+ { "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
+ { "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
};
/*
uint32_t election_timeout;
uint32_t takeover_timeout;
uint32_t monitor_interval;
+ uint32_t script_timeout;
};
/*
struct event_context *ev;
uint32_t recovery_mode;
uint32_t monitoring_mode;
+ TALLOC_CTX *monitor_context;
struct ctdb_tunable tunable;
enum ctdb_freeze_mode freeze_mode;
struct ctdb_freeze_handle *freeze_handle;
uint32_t ctdb_get_num_enabled_nodes(struct ctdb_context *ctdb);
-int ctdb_start_monitoring(struct ctdb_context *ctdb);
+void ctdb_stop_monitoring(struct ctdb_context *ctdb);
+void ctdb_start_monitoring(struct ctdb_context *ctdb);
void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
int ctdb_event_script_callback(struct ctdb_context *ctdb,
+ struct timeval timeout,
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
- const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
+ const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
void ctdb_release_all_ips(struct ctdb_context *ctdb);
void set_nonblocking(int fd);
char *ip = inet_ntoa(state->sin->sin_addr);
struct ctdb_tcp_list *tcp;
+ ctdb_start_monitoring(ctdb);
+
if (status != 0) {
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
- ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
+ ctdb_stop_monitoring(ctdb);
+
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ state, takeover_ip_callback, state,
"takeip %s %s %u",
ctdb->takeover.interface,
ip,
TDB_DATA data;
struct ctdb_tcp_list *tcp;
+ ctdb_start_monitoring(ctdb);
+
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = pip->sin;
- ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
+ ctdb_stop_monitoring(ctdb);
+
+ ret = ctdb_event_script_callback(ctdb,
+ timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+ state, release_ip_callback, state,
"releaseip %s %s %u",
ctdb->takeover.interface,
ip,
callback(ctdb, status, private_data);
}
+
+/* called when child times out */
+static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
+ void (*callback)(struct ctdb_context *, int, void *) = state->callback;
+ void *private_data = state->private_data;
+ struct ctdb_context *ctdb = state->ctdb;
+
+ DEBUG(0,("event script timed out\n"));
+ talloc_free(state);
+ callback(ctdb, -1, private_data);
+}
+
/*
destroy a running event script
*/
finished
*/
int ctdb_event_script_callback(struct ctdb_context *ctdb,
+ struct timeval timeout,
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_event_script_handler, state);
+ if (!timeval_is_zero(&timeout)) {
+ event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
+ }
+
return 0;
}