]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
added timeouts in all event scripts
authorAndrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
committerAndrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
(This used to be ctdb commit d986c91a607ed7c7d4869ea786b5cdf80e7862f1)

ctdb/common/ctdb_daemon.c
ctdb/common/ctdb_monitor.c
ctdb/common/ctdb_recover.c
ctdb/common/ctdb_tunables.c
ctdb/include/ctdb_private.h
ctdb/takeover/ctdb_takeover.c
ctdb/takeover/system.c

index 36eebbb9ee3f8d584cb76038dc3e99b7e0c73d1a..95935b8441b2bed29d7a55594bc835f51701e214 100644 (file)
@@ -87,7 +87,7 @@ static void ctdb_main_loop(struct ctdb_context *ctdb)
                                 CTDB_CTRL_FLAG_NOREPLY,
                                 tdb_null, NULL, NULL);
 
-       ret = ctdb_event_script_callback(ctdb, ctdb, 
+       ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb, 
                                         ctdb_start_transport, NULL, "startup");
        if (ret != 0) {
                DEBUG(0,("Failed startup event script\n"));
index 2723b09d24ae9daa08f0c88d048656f855651c87..de5effb6980b9fe125590ceb72f0e946d108b375 100644 (file)
   see if any nodes are dead
  */
 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
-                          struct timeval t, void *private_data)
+                                     struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
        int i;
 
        if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
-               event_add_timed(ctdb->ev, ctdb, 
+               event_add_timed(ctdb->ev, ctdb->monitor_context
                        timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
                        ctdb_check_for_dead_nodes, ctdb);
                return;
@@ -83,7 +83,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
                node->tx_cnt = 0;
        }
        
-       event_add_timed(ctdb->ev, ctdb, 
+       event_add_timed(ctdb->ev, ctdb->monitor_context
                        timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
                        ctdb_check_for_dead_nodes, ctdb);
 }
@@ -100,7 +100,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
        TDB_DATA data;
        struct ctdb_node_flag_change c;
 
-       event_add_timed(ctdb->ev, ctdb, 
+       event_add_timed(ctdb->ev, ctdb->monitor_context
                        timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                        ctdb_check_health, ctdb);
 
@@ -136,32 +136,47 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        int ret;
 
        if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
-               event_add_timed(ctdb->ev, ctdb
+               event_add_timed(ctdb->ev, ctdb->monitor_context,
                                timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                                ctdb_check_health, ctdb);
                return;
        }
        
-       ret = ctdb_event_script_callback(ctdb, ctdb, ctdb_health_callback, ctdb, "monitor");
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
        if (ret != 0) {
                DEBUG(0,("Unable to launch monitor event script\n"));
-               event_add_timed(ctdb->ev, ctdb, 
+               event_add_timed(ctdb->ev, ctdb->monitor_context
                                timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                                ctdb_check_health, ctdb);
        }       
 }
 
+/* stop any monitoring */
+void ctdb_stop_monitoring(struct ctdb_context *ctdb)
+{
+       talloc_free(ctdb->monitor_context);
+       ctdb->monitor_context = talloc_new(ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context);
+}
 
 /*
   start watching for nodes that might be dead
  */
-int ctdb_start_monitoring(struct ctdb_context *ctdb)
+void ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
-       event_add_timed(ctdb->ev, ctdb, 
-                       timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
-                       ctdb_check_for_dead_nodes, ctdb);
-       event_add_timed(ctdb->ev, ctdb, 
-                       timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
-                       ctdb_check_health, ctdb);
-       return 0;
+       struct timed_event *te;
+
+       ctdb_stop_monitoring(ctdb);
+
+       te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+                            timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
+                            ctdb_check_for_dead_nodes, ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, te);
+
+       te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+                            timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+                            ctdb_check_health, ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, te);
 }
index a27c51263f78a856bb05685e5fc6f6d688df7f1a..a010d2e168545da9b1a85dc92c92b32f5ada405e 100644 (file)
@@ -404,6 +404,8 @@ static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void
 {
        struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
 
+       ctdb_start_monitoring(ctdb);
+
        if (status == 0) {
                ctdb->recovery_mode = state->recmode;
        } else {
@@ -453,8 +455,13 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 
        state->c = talloc_steal(state, c);
        state->recmode = recmode;
+       
+       ctdb_stop_monitoring(ctdb);
+
        /* call the events script to tell all subsystems that we have recovered */
-       ret = ctdb_event_script_callback(ctdb, state, 
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
                                         ctdb_recovered_callback, 
                                         state, "recovered");
        if (ret != 0) {
index c799ce4ec4c40f2703d8358b54d38114567b01a8..09ba515abe29c86be70aacd4a42adb62c3605696 100644 (file)
@@ -25,18 +25,19 @@ static const struct {
        uint32_t default_v;
        size_t offset;  
 } tunable_map[] = {
-       { "MaxRedirectCount",  3,  offsetof(struct ctdb_tunable, max_redirect_count) },
-       { "SeqnumFrequency",   1,  offsetof(struct ctdb_tunable, seqnum_frequency) },
-       { "ControlTimeout",    60, offsetof(struct ctdb_tunable, control_timeout) },
-       { "TraverseTimeout",   20, offsetof(struct ctdb_tunable, traverse_timeout) },
-       { "KeepaliveInterval", 2,  offsetof(struct ctdb_tunable, keepalive_interval) },
-       { "KeepaliveLimit",    3,  offsetof(struct ctdb_tunable, keepalive_limit) },
-       { "MaxLACount",        7,  offsetof(struct ctdb_tunable, max_lacount) },
-       { "RecoverTimeout",    5,  offsetof(struct ctdb_tunable, recover_timeout) },
-       { "RecoverInterval",   1,  offsetof(struct ctdb_tunable, recover_interval) },
-       { "ElectionTimeout",   3,  offsetof(struct ctdb_tunable, election_timeout) },
-       { "TakeoverTimeout",   5,  offsetof(struct ctdb_tunable, takeover_timeout) },
-       { "MonitorInterval",  15,  offsetof(struct ctdb_tunable, monitor_interval) },
+       { "MaxRedirectCount",    3,  offsetof(struct ctdb_tunable, max_redirect_count) },
+       { "SeqnumFrequency",     1,  offsetof(struct ctdb_tunable, seqnum_frequency) },
+       { "ControlTimeout",     60, offsetof(struct ctdb_tunable, control_timeout) },
+       { "TraverseTimeout",    20, offsetof(struct ctdb_tunable, traverse_timeout) },
+       { "KeepaliveInterval",   2,  offsetof(struct ctdb_tunable, keepalive_interval) },
+       { "KeepaliveLimit",      3,  offsetof(struct ctdb_tunable, keepalive_limit) },
+       { "MaxLACount",          7,  offsetof(struct ctdb_tunable, max_lacount) },
+       { "RecoverTimeout",      5,  offsetof(struct ctdb_tunable, recover_timeout) },
+       { "RecoverInterval",     1,  offsetof(struct ctdb_tunable, recover_interval) },
+       { "ElectionTimeout",     3,  offsetof(struct ctdb_tunable, election_timeout) },
+       { "TakeoverTimeout",     5,  offsetof(struct ctdb_tunable, takeover_timeout) },
+       { "MonitorInterval",    15,  offsetof(struct ctdb_tunable, monitor_interval) },
+       { "EventScriptTimeout", 20,  offsetof(struct ctdb_tunable, script_timeout) },
 };
 
 /*
index 4af74b2cad0cd3949d851fa462bd24003591eefd..0e5e361d70ccbc04299d2dbfab15a97ef14df109 100644 (file)
@@ -52,6 +52,7 @@ struct ctdb_tunable {
        uint32_t election_timeout;
        uint32_t takeover_timeout;
        uint32_t monitor_interval;
+       uint32_t script_timeout;
 };
 
 /*
@@ -281,6 +282,7 @@ struct ctdb_context {
        struct event_context *ev;
        uint32_t recovery_mode;
        uint32_t monitoring_mode;
+       TALLOC_CTX *monitor_context;
        struct ctdb_tunable tunable;
        enum ctdb_freeze_mode freeze_mode;
        struct ctdb_freeze_handle *freeze_handle;
@@ -917,7 +919,8 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb);
 
 uint32_t ctdb_get_num_enabled_nodes(struct ctdb_context *ctdb);
 
-int ctdb_start_monitoring(struct ctdb_context *ctdb);
+void ctdb_stop_monitoring(struct ctdb_context *ctdb);
+void ctdb_start_monitoring(struct ctdb_context *ctdb);
 void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
 void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
@@ -983,10 +986,11 @@ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
+                              struct timeval timeout,
                               TALLOC_CTX *mem_ctx,
                               void (*callback)(struct ctdb_context *, int, void *),
                               void *private_data,
-                              const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
+                              const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
 void ctdb_release_all_ips(struct ctdb_context *ctdb);
 
 void set_nonblocking(int fd);
index 6f6b46f03d209ff6a40c24c205559cd782296703..2a539b1f97ff76b622e8f2a566be9878c05960ab 100644 (file)
@@ -108,6 +108,8 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
        char *ip = inet_ntoa(state->sin->sin_addr);
        struct ctdb_tcp_list *tcp;
 
+       ctdb_start_monitoring(ctdb);
+
        if (status != 0) {
                DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
                         ip, ctdb->takeover.interface));
@@ -186,7 +188,11 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
                 ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, 
                 ctdb->takeover.interface));
 
-       ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
+       ctdb_stop_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, takeover_ip_callback, state,
                                         "takeip %s %s %u",
                                         ctdb->takeover.interface, 
                                         ip,
@@ -217,6 +223,8 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
        TDB_DATA data;
        struct ctdb_tcp_list *tcp;
 
+       ctdb_start_monitoring(ctdb);
+
        /* send a message to all clients of this node telling them
           that the cluster has been reconfigured and they should
           release any sockets on this IP */
@@ -286,7 +294,11 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        CTDB_NO_MEMORY(ctdb, state->sin);
        *state->sin = pip->sin;
 
-       ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
+       ctdb_stop_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, release_ip_callback, state,
                                         "releaseip %s %s %u",
                                         ctdb->takeover.interface, 
                                         ip,
index 5d5bd7223f4bb998ee4860a30708a65f802503e9..b67362c9a5150bf05f8cbdf818e58ad272248bbb 100644 (file)
@@ -337,6 +337,21 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
        callback(ctdb, status, private_data);
 }
 
+
+/* called when child times out */
+static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, 
+                                     struct timeval t, void *p)
+{
+       struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
+       void (*callback)(struct ctdb_context *, int, void *) = state->callback;
+       void *private_data = state->private_data;
+       struct ctdb_context *ctdb = state->ctdb;
+
+       DEBUG(0,("event script timed out\n"));
+       talloc_free(state);
+       callback(ctdb, -1, private_data);
+}
+
 /*
   destroy a running event script
  */
@@ -352,6 +367,7 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
   finished
  */
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
+                              struct timeval timeout,
                               TALLOC_CTX *mem_ctx,
                               void (*callback)(struct ctdb_context *, int, void *),
                               void *private_data,
@@ -400,5 +416,9 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb,
        event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
                     ctdb_event_script_handler, state);
 
+       if (!timeval_is_zero(&timeout)) {
+               event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
+       }
+
        return 0;
 }