]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
make the running of the takeover and release event scripts async, to prevent outages...
authorAndrew Tridgell <tridge@samba.org>
Fri, 1 Jun 2007 09:05:41 +0000 (19:05 +1000)
committerAndrew Tridgell <tridge@samba.org>
Fri, 1 Jun 2007 09:05:41 +0000 (19:05 +1000)
(This used to be ctdb commit 4189be97eee7ab2a50335c860f2fcd9566667d01)

ctdb/common/ctdb.c
ctdb/common/ctdb_control.c
ctdb/common/ctdb_daemon.c
ctdb/include/ctdb_private.h
ctdb/takeover/ctdb_takeover.c
ctdb/takeover/system.c

index d957e372f98ad8ea36340f8fa603b8f61d318987..273d40236c78dee930ed078ae9c9dbd1dd2a28d0 100644 (file)
@@ -41,7 +41,7 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
 int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile)
 {
        ctdb->logfile = talloc_strdup(ctdb, logfile);
-       if (ctdb->logfile != NULL) {
+       if (ctdb->logfile != NULL && strcmp(logfile, "-") != 0) {
                int fd;
                close(1);
                close(2);
index ac677ac1c329728b3b283433616804b0f782fba1..319adfc6e1f868ce119de94d6c978015a7a07d12 100644 (file)
@@ -265,11 +265,11 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 
        case CTDB_CONTROL_TAKEOVER_IP:
                CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
-               return ctdb_control_takeover_ip(ctdb, indata);
+               return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
 
        case CTDB_CONTROL_RELEASE_IP:
                CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr));
-               return ctdb_control_release_ip(ctdb, indata);
+               return ctdb_control_release_ip(ctdb, c, indata, async_reply);
 
        case CTDB_CONTROL_DELETE_LOW_RSN: 
                CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_delete_low_rsn));
index b98981b9dc5078034c4c0dae3d29b417f0c22e08..3309d375e48778b752e8595317c27a4a3df0cc4b 100644 (file)
@@ -32,7 +32,7 @@
 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
 
 /* called when the "startup" event script has finished */
-static void ctdb_start_transport(struct ctdb_context *ctdb, int status)
+static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
 {
        if (status != 0) {
                DEBUG(0,("startup event failed!\n"));
@@ -87,7 +87,8 @@ static void ctdb_main_loop(struct ctdb_context *ctdb)
                                 CTDB_CTRL_FLAG_NOREPLY,
                                 tdb_null, NULL, NULL);
 
-       ret = ctdb_event_script_callback(ctdb, ctdb_start_transport, "startup");
+       ret = ctdb_event_script_callback(ctdb, ctdb, 
+                                        ctdb_start_transport, NULL, "startup");
        if (ret != 0) {
                DEBUG(0,("Failed startup event script\n"));
                return;
index d3320ac163ec4019c366247feaf784a2ef5e1915..f2087e8e90c08d57676f261b3d468fadcfbe84b3 100644 (file)
@@ -925,10 +925,16 @@ int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout
 int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout, 
                             uint32_t destnode, uint32_t db_id, uint64_t rsn);
 void ctdb_set_realtime(void);
-int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                TDB_DATA indata, 
+                                bool *async_reply);
 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout, 
                          uint32_t destnode, const char *ip);
-int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata);
+int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                TDB_DATA indata, 
+                                bool *async_reply);
 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout, 
                         uint32_t destnode, const char *ip);
 
@@ -951,8 +957,10 @@ int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn);
 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client);
 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
-                              void (*callback)(struct ctdb_context *, int),
-                              const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
+                              TALLOC_CTX *mem_ctx,
+                              void (*callback)(struct ctdb_context *, int, void *),
+                              void *private_data,
+                              const char *fmt, ...) PRINTF_ATTRIBUTE(5,6);
 void ctdb_release_all_ips(struct ctdb_context *ctdb);
 
 void set_nonblocking(int fd);
index d5fcfcee64c8b5c1883e1533facb847c48a19bae..af250f570bc425a2eed46603fe0f988e2d855ecc 100644 (file)
@@ -91,52 +91,48 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
                        ctdb_control_send_arp, arp);
 }
 
+struct takeover_callback_state {
+       struct ctdb_req_control *c;
+       struct sockaddr_in *sin;
+};
 
 /*
-  take over an ip address
+  called when takeip event finishes
  */
-int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
+static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
+                                void *private_data)
 {
-       int ret;
-       struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
+       struct takeover_callback_state *state = 
+               talloc_get_type(private_data, struct takeover_callback_state);
        struct ctdb_takeover_arp *arp;
-       char *ip = inet_ntoa(sin->sin_addr);
+       char *ip = inet_ntoa(state->sin->sin_addr);
        struct ctdb_tcp_list *tcp;
 
-       if (ctdb_sys_have_ip(ip)) {
-               return 0;
-       }
-
-       DEBUG(0,("Takover of IP %s/%u on interface %s\n", 
-                ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, 
-                ctdb->takeover.interface));
-       ret = ctdb_event_script(ctdb, "takeip %s %s %u",
-                               ctdb->takeover.interface, 
-                               ip,
-                               ctdb->nodes[ctdb->vnn]->public_netmask_bits);
-       if (ret != 0) {
+       if (status != 0) {
                DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
                         ip, ctdb->takeover.interface));
-               return -1;
+               ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
+               talloc_free(state);
+               return;
        }
 
        if (!ctdb->takeover.last_ctx) {
                ctdb->takeover.last_ctx = talloc_new(ctdb);
-               CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx);
+               if (!ctdb->takeover.last_ctx) goto failed;
        }
 
        arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
-       CTDB_NO_MEMORY(ctdb, arp);
+       if (!arp) goto failed;
        
        arp->ctdb = ctdb;
-       arp->sin = *sin;
+       arp->sin = *state->sin;
 
        /* add all of the known tcp connections for this IP to the
           list of tcp connections to send tickle acks for */
        for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
-               if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
+               if (state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
                        struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
-                       CTDB_NO_MEMORY(ctdb, t2);
+                       if (t2 == NULL) goto failed;
                        *t2 = *tcp;
                        DLIST_ADD(arp->tcp_list, t2);
                }
@@ -145,42 +141,78 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
        event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx, 
                        timeval_zero(), ctdb_control_send_arp, arp);
 
-       return ret;
+       /* the control succeeded */
+       ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+       talloc_free(state);
+       return;
+
+failed:
+       ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+       talloc_free(state);
+       return;
 }
 
 /*
-  release an ip address
+  take over an ip address
  */
-int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
+int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
+                                struct ctdb_req_control *c,
+                                TDB_DATA indata, 
+                                bool *async_reply)
 {
+       int ret;
+       struct takeover_callback_state *state;
        struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
-       TDB_DATA data;
        char *ip = inet_ntoa(sin->sin_addr);
-       int ret;
-       struct ctdb_tcp_list *tcp;
 
-       if (!ctdb_sys_have_ip(ip)) {
+       /* if our kernel already has this IP, do nothing */
+       if (ctdb_sys_have_ip(ip)) {
                return 0;
        }
 
-       DEBUG(0,("Release of IP %s/%u on interface %s\n", 
+       state = talloc(ctdb, struct takeover_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c = talloc_steal(ctdb, c);
+       state->sin = talloc(ctdb, struct sockaddr_in);       
+       CTDB_NO_MEMORY(ctdb, state->sin);
+       *state->sin = *(struct sockaddr_in *)indata.dptr;       
+
+       DEBUG(0,("Takover of IP %s/%u on interface %s\n", 
                 ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, 
                 ctdb->takeover.interface));
 
-       /* stop any previous arps */
-       talloc_free(ctdb->takeover.last_ctx);
-       ctdb->takeover.last_ctx = NULL;
-
-       ret = ctdb_event_script(ctdb, "releaseip %s %s %u",
-                               ctdb->takeover.interface, 
-                               ip,
-                               ctdb->nodes[ctdb->vnn]->public_netmask_bits);
+       ret = ctdb_event_script_callback(ctdb, state, takeover_ip_callback, state,
+                                        "takeip %s %s %u",
+                                        ctdb->takeover.interface, 
+                                        ip,
+                                        ctdb->nodes[ctdb->vnn]->public_netmask_bits);
        if (ret != 0) {
-               DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
+               DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
                         ip, ctdb->takeover.interface));
+               talloc_free(state);
                return -1;
        }
 
+       /* tell ctdb_control.c that we will be replying asynchronously */
+       *async_reply = true;
+
+       return 0;
+}
+
+
+/*
+  called when releaseip event finishes
+ */
+static void release_ip_callback(struct ctdb_context *ctdb, int status, 
+                               void *private_data)
+{
+       struct takeover_callback_state *state = 
+               talloc_get_type(private_data, struct takeover_callback_state);
+       char *ip = inet_ntoa(state->sin->sin_addr);
+       TDB_DATA data;
+       struct ctdb_tcp_list *tcp;
+
        /* send a message to all clients of this node telling them
           that the cluster has been reconfigured and they should
           release any sockets on this IP */
@@ -192,7 +224,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
        /* tell other nodes about any tcp connections we were holding with this IP */
        for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
                if (tcp->vnn == ctdb->vnn && 
-                   sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
+                   state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
                        struct ctdb_control_tcp_vnn t;
 
                        t.vnn  = ctdb->vnn;
@@ -208,6 +240,59 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
                }
        }
 
+       /* the control succeeded */
+       ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
+       talloc_free(state);
+}
+
+
+/*
+  release an ip address
+ */
+int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
+                               struct ctdb_req_control *c,
+                               TDB_DATA indata, 
+                               bool *async_reply)
+{
+       int ret;
+       struct takeover_callback_state *state;
+       struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
+       char *ip = inet_ntoa(sin->sin_addr);
+
+       if (!ctdb_sys_have_ip(ip)) {
+               return 0;
+       }
+
+       DEBUG(0,("Release of IP %s/%u on interface %s\n", 
+                ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, 
+                ctdb->takeover.interface));
+
+       /* stop any previous arps */
+       talloc_free(ctdb->takeover.last_ctx);
+       ctdb->takeover.last_ctx = NULL;
+
+       state = talloc(ctdb, struct takeover_callback_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->c = talloc_steal(state, c);
+       state->sin = talloc(state, struct sockaddr_in);       
+       CTDB_NO_MEMORY(ctdb, state->sin);
+       *state->sin = *(struct sockaddr_in *)indata.dptr;       
+
+       ret = ctdb_event_script_callback(ctdb, state, release_ip_callback, state,
+                                        "releaseip %s %s %u",
+                                        ctdb->takeover.interface, 
+                                        ip,
+                                        ctdb->nodes[ctdb->vnn]->public_netmask_bits);
+       if (ret != 0) {
+               DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
+                        ip, ctdb->takeover.interface));
+               talloc_free(state);
+               return -1;
+       }
+
+       /* tell the control that we will be reply asynchronously */
+       *async_reply = true;
 
        return 0;
 }
index 59016e2c372502b76f4c8be967c49c548d1a1c6e..cff122f35b0cbbba774f5fc286a8aefa54fac541 100644 (file)
@@ -312,8 +312,9 @@ int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
 struct ctdb_event_script_state {
        struct ctdb_context *ctdb;
        pid_t child;
-       void (*callback)(struct ctdb_context *, int);
+       void (*callback)(struct ctdb_context *, int, void *);
        int fd[2];
+       void *private_data;
 };
 
 /* called when child is finished */
@@ -327,28 +328,41 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
        if (status != -1) {
                status = WEXITSTATUS(status);
        }
-       state->callback(state->ctdb, status);
+       state->callback(state->ctdb, status, state->private_data);
+       talloc_set_destructor(state, NULL);
        talloc_free(state);
 }
 
+/*
+  destroy a running event script
+ */
+static int event_script_destructor(struct ctdb_event_script_state *state)
+{
+       kill(state->child, SIGKILL);
+       waitpid(state->child, NULL, 0);
+       return 0;
+}
 
 /*
   run the event script in the background, calling the callback when 
   finished
  */
 int ctdb_event_script_callback(struct ctdb_context *ctdb, 
-                              void (*callback)(struct ctdb_context *, int),
+                              TALLOC_CTX *mem_ctx,
+                              void (*callback)(struct ctdb_context *, int, void *),
+                              void *private_data,
                               const char *fmt, ...)
 {
        struct ctdb_event_script_state *state;
        va_list ap;
        int ret;
 
-       state = talloc(ctdb, struct ctdb_event_script_state);
+       state = talloc(mem_ctx, struct ctdb_event_script_state);
        CTDB_NO_MEMORY(ctdb, state);
 
        state->ctdb = ctdb;
        state->callback = callback;
+       state->private_data = private_data;
        
        ret = pipe(state->fd);
        if (ret != 0) {
@@ -373,6 +387,8 @@ int ctdb_event_script_callback(struct ctdb_context *ctdb,
                _exit(ret);
        }
 
+       talloc_set_destructor(state, event_script_destructor);
+
        close(state->fd[1]);
 
        event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,