added timeouts in all event scripts

author Andrew Tridgell <tridge@samba.org>

Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)

committer Andrew Tridgell <tridge@samba.org>

Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
author Andrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
committer Andrew Tridgell <tridge@samba.org>
Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
diff --git a/ctdb/common/ctdb_daemon.c b/ctdb/common/ctdb_daemon.c

index 36eebbb9ee3f8d584cb76038dc3e99b7e0c73d1a..95935b8441b2bed29d7a55594bc835f51701e214 100644 (file)
--- a/ctdb/common/ctdb_daemon.c
+++ b/ctdb/common/ctdb_daemon.c
@@ -87,7 +87,7 @@ static void ctdb_main_loop(struct ctdb_context *ctdb)
                                  CTDB_CTRL_FLAG_NOREPLY,
                                  tdb_null, NULL, NULL);
  
-       ret = ctdb_event_script_callback(ctdb, ctdb, 
+       ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb, 
                                          ctdb_start_transport, NULL, "startup");
         if (ret != 0) {
                 DEBUG(0,("Failed startup event script\n"));
diff --git a/ctdb/common/ctdb_monitor.c b/ctdb/common/ctdb_monitor.c

index 2723b09d24ae9daa08f0c88d048656f855651c87..de5effb6980b9fe125590ceb72f0e946d108b375 100644 (file)
--- a/ctdb/common/ctdb_monitor.c
+++ b/ctdb/common/ctdb_monitor.c
@@ -29,13 +29,13 @@
    see if any nodes are dead
   */
  static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
-                          struct timeval t, void *private_data)
+                                     struct timeval t, void *private_data)
  {
         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
         int i;
  
         if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
-               event_add_timed(ctdb->ev, ctdb, 
+               event_add_timed(ctdb->ev, ctdb->monitor_context, 
                         timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
                         ctdb_check_for_dead_nodes, ctdb);
                 return;
@@ -83,7 +83,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve
                 node->tx_cnt = 0;
         }
         
-       event_add_timed(ctdb->ev, ctdb, 
+       event_add_timed(ctdb->ev, ctdb->monitor_context, 
                         timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
                         ctdb_check_for_dead_nodes, ctdb);
  }
@@ -100,7 +100,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
         TDB_DATA data;
         struct ctdb_node_flag_change c;
  
-       event_add_timed(ctdb->ev, ctdb, 
+       event_add_timed(ctdb->ev, ctdb->monitor_context, 
                         timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                         ctdb_check_health, ctdb);
  
@@ -136,32 +136,47 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
         int ret;
  
         if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
-               event_add_timed(ctdb->ev, ctdb, 
+               event_add_timed(ctdb->ev, ctdb->monitor_context,
                                 timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                                 ctdb_check_health, ctdb);
                 return;
         }
         
-       ret = ctdb_event_script_callback(ctdb, ctdb, ctdb_health_callback, ctdb, "monitor");
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
         if (ret != 0) {
                 DEBUG(0,("Unable to launch monitor event script\n"));
-               event_add_timed(ctdb->ev, ctdb, 
+               event_add_timed(ctdb->ev, ctdb->monitor_context, 
                                 timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                                 ctdb_check_health, ctdb);
         }       
  }
  
+/* stop any monitoring */
+void ctdb_stop_monitoring(struct ctdb_context *ctdb)
+{
+       talloc_free(ctdb->monitor_context);
+       ctdb->monitor_context = talloc_new(ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context);
+}
  
  /*
    start watching for nodes that might be dead
   */
-int ctdb_start_monitoring(struct ctdb_context *ctdb)
+void ctdb_start_monitoring(struct ctdb_context *ctdb)
  {
-       event_add_timed(ctdb->ev, ctdb, 
-                       timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
-                       ctdb_check_for_dead_nodes, ctdb);
-       event_add_timed(ctdb->ev, ctdb, 
-                       timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
-                       ctdb_check_health, ctdb);
-       return 0;
+       struct timed_event *te;
+
+       ctdb_stop_monitoring(ctdb);
+
+       te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+                            timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
+                            ctdb_check_for_dead_nodes, ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, te);
+
+       te = event_add_timed(ctdb->ev, ctdb->monitor_context,
+                            timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+                            ctdb_check_health, ctdb);
+       CTDB_NO_MEMORY_FATAL(ctdb, te);
  }
diff --git a/ctdb/common/ctdb_recover.c b/ctdb/common/ctdb_recover.c

index a27c51263f78a856bb05685e5fc6f6d688df7f1a..a010d2e168545da9b1a85dc92c92b32f5ada405e 100644 (file)
--- a/ctdb/common/ctdb_recover.c
+++ b/ctdb/common/ctdb_recover.c
@@ -404,6 +404,8 @@ static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void
  {
         struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
  
+       ctdb_start_monitoring(ctdb);
+
         if (status == 0) {
                 ctdb->recovery_mode = state->recmode;
         } else {
@@ -453,8 +455,13 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
  
         state->c = talloc_steal(state, c);
         state->recmode = recmode;
+       
+       ctdb_stop_monitoring(ctdb);
+
         /* call the events script to tell all subsystems that we have recovered */
-       ret = ctdb_event_script_callback(ctdb, state, 
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, 
                                          ctdb_recovered_callback, 
                                          state, "recovered");
         if (ret != 0) {
diff --git a/ctdb/common/ctdb_tunables.c b/ctdb/common/ctdb_tunables.c

index c799ce4ec4c40f2703d8358b54d38114567b01a8..09ba515abe29c86be70aacd4a42adb62c3605696 100644 (file)
--- a/ctdb/common/ctdb_tunables.c
+++ b/ctdb/common/ctdb_tunables.c
@@ -25,18 +25,19 @@ static const struct {
         uint32_t default_v;
         size_t offset;  
  } tunable_map[] = {
-       { "MaxRedirectCount",  3,  offsetof(struct ctdb_tunable, max_redirect_count) },
-       { "SeqnumFrequency",   1,  offsetof(struct ctdb_tunable, seqnum_frequency) },
-       { "ControlTimeout",    60, offsetof(struct ctdb_tunable, control_timeout) },
-       { "TraverseTimeout",   20, offsetof(struct ctdb_tunable, traverse_timeout) },
-       { "KeepaliveInterval", 2,  offsetof(struct ctdb_tunable, keepalive_interval) },
-       { "KeepaliveLimit",    3,  offsetof(struct ctdb_tunable, keepalive_limit) },
-       { "MaxLACount",        7,  offsetof(struct ctdb_tunable, max_lacount) },
-       { "RecoverTimeout",    5,  offsetof(struct ctdb_tunable, recover_timeout) },
-       { "RecoverInterval",   1,  offsetof(struct ctdb_tunable, recover_interval) },
-       { "ElectionTimeout",   3,  offsetof(struct ctdb_tunable, election_timeout) },
-       { "TakeoverTimeout",   5,  offsetof(struct ctdb_tunable, takeover_timeout) },
-       { "MonitorInterval",  15,  offsetof(struct ctdb_tunable, monitor_interval) },
+       { "MaxRedirectCount",    3,  offsetof(struct ctdb_tunable, max_redirect_count) },
+       { "SeqnumFrequency",     1,  offsetof(struct ctdb_tunable, seqnum_frequency) },
+       { "ControlTimeout",     60, offsetof(struct ctdb_tunable, control_timeout) },
+       { "TraverseTimeout",    20, offsetof(struct ctdb_tunable, traverse_timeout) },
+       { "KeepaliveInterval",   2,  offsetof(struct ctdb_tunable, keepalive_interval) },
+       { "KeepaliveLimit",      3,  offsetof(struct ctdb_tunable, keepalive_limit) },
+       { "MaxLACount",          7,  offsetof(struct ctdb_tunable, max_lacount) },
+       { "RecoverTimeout",      5,  offsetof(struct ctdb_tunable, recover_timeout) },
+       { "RecoverInterval",     1,  offsetof(struct ctdb_tunable, recover_interval) },
+       { "ElectionTimeout",     3,  offsetof(struct ctdb_tunable, election_timeout) },
+       { "TakeoverTimeout",     5,  offsetof(struct ctdb_tunable, takeover_timeout) },
+       { "MonitorInterval",    15,  offsetof(struct ctdb_tunable, monitor_interval) },
+       { "EventScriptTimeout", 20,  offsetof(struct ctdb_tunable, script_timeout) },
  };
  
  /*
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h

index 4af74b2cad0cd3949d851fa462bd24003591eefd..0e5e361d70ccbc04299d2dbfab15a97ef14df109 100644 (file)
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -52,6 +52,7 @@ struct ctdb_tunable {
         uint32_t election_timeout;
         uint32_t takeover_timeout;
         uint32_t monitor_interval;
+       uint32_t script_timeout;
  };
  
  /*
@@ -281,6 +282,7 @@ struct ctdb_context {
         struct event_context *ev;
         uint32_t recovery_mode;
         uint32_t monitoring_mode;
+       TALLOC_CTX *monitor_context;
         struct ctdb_tunable tunable;
         enum ctdb_freeze_mode freeze_mode;
         struct ctdb_freeze_handle *freeze_handle;
@@ -917,7 +919,8 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb);
  
  uint32_t ctdb_get_num_enabled_nodes(struct ctdb_context *ctdb);
  
-int ctdb_start_monitoring(struct ctdb_context *ctdb);
+void ctdb_stop_monitoring(struct ctdb_context *ctdb);
+void ctdb_start_monitoring(struct ctdb_context *ctdb);
  void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
  
  void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
@@ -983,10 +986,11 @@ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
  void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
  int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
  int ctdb_event_script_callback(struct ctdb_context *ctdb, 
+                              struct timeval timeout,
                                TALLOC_CTX *mem_ctx,
                                void (*callback)(struct ctdb_context *, int, void *),
                                void *private_data,
-                              const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
+                              const char *fmt, ...) PRINTF_ATTRIBUTE(6,7);
  void ctdb_release_all_ips(struct ctdb_context *ctdb);
  
  void set_nonblocking(int fd);
diff --git a/ctdb/takeover/ctdb_takeover.c b/ctdb/takeover/ctdb_takeover.c

index 6f6b46f03d209ff6a40c24c205559cd782296703..2a539b1f97ff76b622e8f2a566be9878c05960ab 100644 (file)
--- a/ctdb/takeover/ctdb_takeover.c
+++ b/ctdb/takeover/ctdb_takeover.c
@@ -108,6 +108,8 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
         char *ip = inet_ntoa(state->sin->sin_addr);
         struct ctdb_tcp_list *tcp;
  
+       ctdb_start_monitoring(ctdb);
+
         if (status != 0) {
                 DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
                          ip, ctdb->takeover.interface));
@@ -186,7 +188,11 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
                  ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, 
                  ctdb->takeover.interface));
  
-       ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
+       ctdb_stop_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, takeover_ip_callback, state,
                                          "takeip %s %s %u",
                                          ctdb->takeover.interface, 
                                          ip,
@@ -217,6 +223,8 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
         TDB_DATA data;
         struct ctdb_tcp_list *tcp;
  
+       ctdb_start_monitoring(ctdb);
+
         /* send a message to all clients of this node telling them
            that the cluster has been reconfigured and they should
            release any sockets on this IP */
@@ -286,7 +294,11 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
         CTDB_NO_MEMORY(ctdb, state->sin);
         *state->sin = pip->sin;
  
-       ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
+       ctdb_stop_monitoring(ctdb);
+
+       ret = ctdb_event_script_callback(ctdb, 
+                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                        state, release_ip_callback, state,
                                          "releaseip %s %s %u",
                                          ctdb->takeover.interface, 
                                          ip,
diff --git a/ctdb/takeover/system.c b/ctdb/takeover/system.c

index 5d5bd7223f4bb998ee4860a30708a65f802503e9..b67362c9a5150bf05f8cbdf818e58ad272248bbb 100644 (file)
--- a/ctdb/takeover/system.c
+++ b/ctdb/takeover/system.c
@@ -337,6 +337,21 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
         callback(ctdb, status, private_data);
  }
  
+
+/* called when child times out */
+static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te, 
+                                     struct timeval t, void *p)
+{
+       struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
+       void (*callback)(struct ctdb_context *, int, void *) = state->callback;
+       void *private_data = state->private_data;
+       struct ctdb_context *ctdb = state->ctdb;
+
+       DEBUG(0,("event script timed out\n"));
+       talloc_free(state);
+       callback(ctdb, -1, private_data);
+}
+
  /*
    destroy a running event script
   */
@@ -352,6 +367,7 @@ static int event_script_destructor(struct ctdb_event_script_state *state)
    finished
   */
  int ctdb_event_script_callback(struct ctdb_context *ctdb, 
+                              struct timeval timeout,
                                TALLOC_CTX *mem_ctx,
                                void (*callback)(struct ctdb_context *, int, void *),
                                void *private_data,
@@ -400,5 +416,9 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb,
         event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
                      ctdb_event_script_handler, state);
  
+       if (!timeval_is_zero(&timeout)) {
+               event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
+       }
+
         return 0;
  }
author	Andrew Tridgell <tridge@samba.org>
	Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
committer	Andrew Tridgell <tridge@samba.org>
	Wed, 6 Jun 2007 03:45:12 +0000 (13:45 +1000)
ctdb/common/ctdb_daemon.c		patch \| blob \| blame \| history
ctdb/common/ctdb_monitor.c		patch \| blob \| blame \| history
ctdb/common/ctdb_recover.c		patch \| blob \| blame \| history
ctdb/common/ctdb_tunables.c		patch \| blob \| blame \| history
ctdb/include/ctdb_private.h		patch \| blob \| blame \| history
ctdb/takeover/ctdb_takeover.c		patch \| blob \| blame \| history
ctdb/takeover/system.c		patch \| blob \| blame \| history