From: Christian Brauner Date: Thu, 29 Jun 2017 10:16:00 +0000 (+0200) Subject: start: use separate socket on daemonized start X-Git-Tag: lxc-2.1.0~57^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5e5576a4ef2d6f4a1abce6b8e0ff68edebdde47f;p=thirdparty%2Flxc.git start: use separate socket on daemonized start Since we killed lxc-monitord we rely on the container's command socket to wait for the container. This doesn't work nicely on daemonized startup since a container's init process might be something that is so short-lived that we won't even be able to add a state client before the mainloop closes. But the container might still have been RUNNING and executed the init binary correctly. In this case we would erroneously report that the container failed to start when it actually started just fine. This commit ensures that we really all cases where the container successfully ran by switching to a short-lived per-container anonymous unix socket pair that uses credentials to pass container states around. It is immediately closed once the container has started successfully. This should also make daemonized container start way more robust since we don't rely on the command socket handler to be running. For the experienced developer: Yes, I did think about utilizing the command socket directly for this. The problem is that when the mainloop starts it may end up end accept()ing the connection that we want do_wait_on_daemonized_start() to accept() so this won't work and might cause us to hang indefinitely. The same problem arises when the container fails to start before the mainloop is created. In this case we would hang indefinitely as well. Signed-off-by: Christian Brauner --- diff --git a/src/lxc/criu.c b/src/lxc/criu.c index d9fe2a60d..b1ab5d46e 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -797,7 +797,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ close(fd); } - handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path); + handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false); if (!handler) goto out; diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 832c15968..cc35990ea 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -38,6 +38,7 @@ #include #include +#include "af_unix.h" #include "attach.h" #include "bdev.h" #include "lxcoverlay.h" @@ -611,23 +612,6 @@ static bool do_lxcapi_wait(struct lxc_container *c, const char *state, int timeo WRAP_API_2(bool, lxcapi_wait, const char *, int) -static bool do_wait_on_daemonized_start(struct lxc_container *c, int pid) -{ - /* we'll probably want to make this timeout configurable? */ - int timeout = 5, ret, status; - - /* - * our child is going to fork again, then exit. reap the - * child - */ - ret = waitpid(pid, &status, 0); - if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) - DEBUG("failed waiting for first dual-fork child"); - return do_lxcapi_wait(c, "RUNNING", timeout); -} - -WRAP_API_1(bool, wait_on_daemonized_start, int) - static bool am_single_threaded(void) { struct dirent *direntp; @@ -714,6 +698,77 @@ static void free_init_cmd(char **argv) free(argv); } +static int lxc_rcv_status(int state_socket) +{ + int ret; + lxc_state_t state = -1; + struct timeval timeout = {0}; + + /* Set 5 second timeout to prevent hanging forever in case something + * goes wrong. 5 seconds is a long time to get into RUNNING state. + */ + timeout.tv_sec = 5; + ret = setsockopt(state_socket, SOL_SOCKET, SO_RCVTIMEO, + (const void *)&timeout, sizeof(timeout)); + if (ret < 0) { + SYSERROR("Failed to set 5s timeout on containter state socket"); + return -1; + } + + /* Receive container state. */ + ret = lxc_abstract_unix_rcv_credential(state_socket, &state, + sizeof(lxc_state_t)); + /* Close container state client. */ + close(state_socket); + + if (ret <= 0) + return -1; + + return state; +} + +static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) +{ + int state, status; + + /* Close write end of the socket pair. */ + close(handler->state_socket_pair[1]); + handler->state_socket_pair[1] = -1; + + state = lxc_rcv_status(handler->state_socket_pair[0]); + + /* Close read end of the socket pair. */ + close(handler->state_socket_pair[0]); + handler->state_socket_pair[0] = -1; + + /* The first child is going to fork() again and then exits. So we reap + * the first child here. + */ + if (waitpid(pid, &status, 0) < 0) + DEBUG("Failed waiting on first child"); + else if (!WIFEXITED(status)) + DEBUG("Failed to retrieve exit status of first child"); + else if (WEXITSTATUS(status) != 0) + DEBUG("First child exited with: %d", WEXITSTATUS(status)); + + if (state < 0) { + SYSERROR("Failed to receive the container state"); + return false; + } + + /* If we receive anything else then running we know that the container + * failed to start. + */ + if (state != RUNNING) { + ERROR("Received container state \"%s\" instead of \"RUNNING\"", + lxc_state2str(state)); + return false; + } + + TRACE("Container is in \"RUNNING\" state"); + return true; +} + static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const argv[]) { int ret; @@ -761,7 +816,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a daemonize = c->daemonize; /* initialize handler */ - handler = lxc_init_handler(c->name, conf, c->config_path); + handler = lxc_init_handler(c->name, conf, c->config_path, daemonize); container_mem_unlock(c); if (!handler) return false; @@ -805,8 +860,12 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a * the PID file, child will do the free and unlink. */ c->pidfile = NULL; + + /* Prevent leaking the command socket to the second + * fork(). + */ close(handler->conf->maincmd_fd); - return wait_on_daemonized_start(c, pid); + return wait_on_daemonized_start(handler, pid); } /* We don't really care if this doesn't print all the @@ -830,7 +889,11 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a SYSERROR("Error chdir()ing to /."); exit(1); } - lxc_check_inherited(conf, true, &handler->conf->maincmd_fd, 1); + lxc_check_inherited(conf, true, + (int[]){handler->conf->maincmd_fd, + handler->state_socket_pair[0], + handler->state_socket_pair[1]}, + 3); if (null_stdfds() < 0) { ERROR("failed to close fds"); exit(1); @@ -895,12 +958,16 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a reboot: if (conf->reboot == 2) { /* initialize handler */ - handler = lxc_init_handler(c->name, conf, c->config_path); + handler = lxc_init_handler(c->name, conf, c->config_path, daemonize); if (!handler) goto out; } - if (lxc_check_inherited(conf, daemonize, &handler->conf->maincmd_fd, 1)) { + if (lxc_check_inherited(conf, daemonize, + (int[]){handler->conf->maincmd_fd, + handler->state_socket_pair[0], + handler->state_socket_pair[1]}, + 3)) { ERROR("Inherited fds found"); lxc_free_handler(handler); ret = 1; diff --git a/src/lxc/start.c b/src/lxc/start.c index a9dbd78a5..ad8c9f86d 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -342,8 +342,9 @@ static int signal_handler(int fd, uint32_t events, void *data, return 1; } -int lxc_set_state(const char *name, struct lxc_handler *handler, - lxc_state_t state) +static int lxc_serve_state_clients(const char *name, + struct lxc_handler *handler, + lxc_state_t state) { ssize_t ret; struct lxc_list *cur, *next; @@ -351,6 +352,7 @@ int lxc_set_state(const char *name, struct lxc_handler *handler, struct lxc_msg msg = {.type = lxc_msg_state, .value = state}; process_lock(); + /* Only set state under process lock held so that we don't cause * lxc_cmd_state_server() to miss a state. */ @@ -396,6 +398,55 @@ int lxc_set_state(const char *name, struct lxc_handler *handler, } process_unlock(); + return 0; +} + +static int lxc_serve_state_socket_pair(const char *name, + struct lxc_handler *handler, + lxc_state_t state) +{ + ssize_t ret; + + if (!handler->backgrounded || + handler->state_socket_pair[1] < 0 || + state == STARTING) + return 0; + + /* Close read end of the socket pair. */ + close(handler->state_socket_pair[0]); + handler->state_socket_pair[0] = -1; + + ret = lxc_abstract_unix_send_credential(handler->state_socket_pair[1], + &(int){state}, sizeof(int)); + if (ret != sizeof(int)) + SYSERROR("Failed to send state to %d", + handler->state_socket_pair[1]); + + TRACE("Sent container state \"%s\" to %d", lxc_state2str(state), + handler->state_socket_pair[1]); + + /* Close write end of the socket pair. */ + close(handler->state_socket_pair[1]); + handler->state_socket_pair[1] = -1; + + return 0; +} + +int lxc_set_state(const char *name, struct lxc_handler *handler, + lxc_state_t state) +{ + int ret; + + ret = lxc_serve_state_socket_pair(name, handler, state); + if (ret < 0) { + ERROR("Failed to synchronize via anonymous pair of unix sockets"); + return -1; + } + + ret = lxc_serve_state_clients(name, handler, state); + if (ret < 0) + return -1; + /* This function will try to connect to the legacy lxc-monitord state * server and only exists for backwards compatibility. */ @@ -458,6 +509,12 @@ void lxc_free_handler(struct lxc_handler *handler) if (handler->conf && handler->conf->maincmd_fd) close(handler->conf->maincmd_fd); + if (handler->state_socket_pair[0] >= 0) + close(handler->state_socket_pair[0]); + + if (handler->state_socket_pair[1] >= 0) + close(handler->state_socket_pair[1]); + if (handler->name) free(handler->name); @@ -466,9 +523,9 @@ void lxc_free_handler(struct lxc_handler *handler) } struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, - const char *lxcpath) + const char *lxcpath, bool daemonize) { - int i; + int i, ret; struct lxc_handler *handler; handler = malloc(sizeof(*handler)); @@ -483,6 +540,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, handler->conf = conf; handler->lxcpath = lxcpath; handler->pinfd = -1; + handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1; lxc_list_init(&handler->state_clients); for (i = 0; i < LXC_NS_MAX; i++) @@ -494,6 +552,22 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, goto on_error; } + if (daemonize && !handler->conf->reboot) { + /* Create socketpair() to synchronize on daemonized startup. + * When the container reboots we don't need to synchronize again + * currently so don't open another socketpair(). + */ + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, + handler->state_socket_pair); + if (ret < 0) { + ERROR("Failed to create anonymous pair of unix sockets"); + goto on_error; + } + TRACE("Created anonymous pair {%d,%d} of unix sockets", + handler->state_socket_pair[0], + handler->state_socket_pair[1]); + } + if (lxc_cmd_init(name, handler, lxcpath)) { ERROR("failed to set up command socket"); goto on_error; diff --git a/src/lxc/start.h b/src/lxc/start.h index 58bfbb25c..d8d06cfbf 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -25,6 +25,8 @@ #include #include +#include +#include #include #include "conf.h" @@ -50,6 +52,10 @@ struct lxc_handler { bool backgrounded; // indicates whether should we close std{in,out,err} on start int nsfd[LXC_NS_MAX]; int netnsfd; + /* The socketpair() fds used to wait on successful daemonized + * startup. + */ + int state_socket_pair[2]; struct lxc_list state_clients; }; @@ -68,7 +74,8 @@ extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_stat extern void lxc_abort(const char *name, struct lxc_handler *handler); extern struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, - const char *lxcpath); + const char *lxcpath, + bool daemonize); extern void lxc_free_handler(struct lxc_handler *handler); extern int lxc_init(const char *name, struct lxc_handler *handler); extern void lxc_fini(const char *name, struct lxc_handler *handler);