From dbc9832d217d812f68184cc431bfc5ac559e5dfa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 11 Jun 2017 01:29:45 +0200 Subject: [PATCH] commands: add lxc_cmd_state_server() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A LXC container's lifecycle is regulated by the states STARTING, RUNNING, STOPPING, STOPPED, ABORTING. These states are tracked in the LXC handler and can be checked via approriate functions in the command socket callback system. (The freezer stages are not part of a container's lifecycle since they are not recorded in the LXC handler. This might change in the future but given that the freezer controller will be removed from future cgroup implementations it is unlikely.) So far, LXC was using an external helper to track the states of a container (lxc-monitord). This solution was error prone. For example, the external state server would hang in various scenarios that seemed to be caused by either very subtle internal races or irritation of the external state server by signals. LXC will switch from an external state monitor (lxc-monitord) which serves as a state server for state clients to a native implementation using the indiviual container's command socket. This solution was discussed and outlined by Stéphane Graber and Christian Brauner during a LX{C,D} sprint. The LXC handler will gain an additional field to track state clients. In order for a state client to receive state notifications from the command server he will need to register himself via the lxc_cmd_state_server() function in the state client list. The state client list will be served by lxc_set_state() during the container's lifecycle. lxc_set_state() will also take care of removing any clients from the state list in the LXC handler once the requested state has been reached and sent to the client. In order to prevent races between adding and serving new state clients the state client list and the state field in the LXC handler will be protected by a lock. This commit effectively deprecates lxc-monitord. Instead of serving states to state clients via the lxc-monitord fifo and socket we will now send the state of the container via the container's command socket. lxc-monitord is still useable and will - for the sake of the lxc-monitor command - be kept around so that non-API state clients can still monitor the container during it's lifecycle. Signed-off-by: Christian Brauner --- src/lxc/commands.c | 151 ++++++++++++++++++++++++++++++++++++++++- src/lxc/commands.h | 5 +- src/lxc/lxccontainer.c | 4 +- src/lxc/start.c | 85 ++++++++++++++++++++++- src/lxc/start.h | 20 +++--- src/lxc/state.c | 85 +++-------------------- src/lxc/state.h | 11 ++- 7 files changed, 264 insertions(+), 97 deletions(-) diff --git a/src/lxc/commands.c b/src/lxc/commands.c index d9f955fc0..baa98cd48 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -43,7 +43,9 @@ #include "commands.h" #include "console.h" #include "confile.h" +#include "lxclock.h" #include "mainloop.h" +#include "monitor.h" #include "af_unix.h" #include "config.h" @@ -142,6 +144,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item", [LXC_CMD_GET_NAME] = "get_name", [LXC_CMD_GET_LXCPATH] = "get_lxcpath", + [LXC_CMD_STATE_SERVER] = "state_server", }; if (cmd >= LXC_CMD_MAX) @@ -283,7 +286,11 @@ static int lxc_cmd(const char *name, struct lxc_cmd_rr *cmd, int *stopped, char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 }; char *offset = &path[1]; size_t len; - int stay_connected = cmd->req.cmd == LXC_CMD_CONSOLE; + bool stay_connected = false; + + if (cmd->req.cmd == LXC_CMD_CONSOLE || + cmd->req.cmd == LXC_CMD_STATE_SERVER) + stay_connected = true; *stopped = 0; @@ -581,7 +588,7 @@ out: * * Returns the state on success, < 0 on failure */ -lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath) +int lxc_cmd_get_state(const char *name, const char *lxcpath) { int ret, stopped; struct lxc_cmd_rr cmd = { @@ -861,6 +868,145 @@ static int lxc_cmd_get_lxcpath_callback(int fd, struct lxc_cmd_req *req, return lxc_cmd_rsp_send(fd, &rsp); } +/* + * lxc_cmd_state_server: register a client fd in the handler list + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns the lxcpath on success, NULL on failure. + */ +int lxc_cmd_state_server(const char *name, const char *lxcpath, + lxc_state_t states[MAX_STATE]) +{ + int stopped; + ssize_t ret; + int state = -1; + struct lxc_msg msg = {0}; + struct lxc_cmd_rr cmd = { + .req = { + .cmd = LXC_CMD_STATE_SERVER, + .data = states, + .datalen = (sizeof(lxc_state_t) * MAX_STATE) + }, + }; + + /* Lock the whole lxc_cmd_state_server_callback() call to ensure that + * lxc_set_state() doesn't cause us to miss a state. + */ + process_lock(); + /* Check if already in requested state. */ + state = lxc_getstate(name, lxcpath); + if (state < 0) { + process_unlock(); + TRACE("failed to retrieve state of container: %s", + strerror(errno)); + return -1; + } else if (states[state]) { + process_unlock(); + TRACE("container is %s state", lxc_state2str(state)); + return state; + } + + if ((state == STARTING) && !states[RUNNING] && !states[STOPPING] && !states[STOPPED]) { + process_unlock(); + TRACE("container is in %s state and caller requested to be " + "informed about a previous state", + lxc_state2str(state)); + return state; + } else if ((state == RUNNING) && !states[STOPPING] && !states[STOPPED]) { + process_unlock(); + TRACE("container is in %s state and caller requested to be " + "informed about a previous state", + lxc_state2str(state)); + return state; + } else if ((state == STOPPING) && !states[STOPPED]) { + process_unlock(); + TRACE("container is in %s state and caller requested to be " + "informed about a previous state", + lxc_state2str(state)); + return state; + } else if ((state == STOPPED) || (state == ABORTING)) { + process_unlock(); + TRACE("container is in %s state and caller requested to be " + "informed about a previous state", + lxc_state2str(state)); + return state; + } + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); + process_unlock(); + if (ret < 0) { + ERROR("failed to execute command: %s", strerror(errno)); + return -1; + } + /* We should now be guaranteed to get an answer from the state sending + * function. + */ + + if (cmd.rsp.ret < 0) { + ERROR("failed to receive socket fd"); + return -1; + } + +again: + ret = recv(cmd.rsp.ret, &msg, sizeof(msg), 0); + if (ret < 0) { + if (errno == EINTR) + goto again; + + ERROR("failed to receive message: %s", strerror(errno)); + return -1; + } + if (ret == 0) { + ERROR("length of message was 0"); + return -1; + } + + TRACE("received state %s from state client %d", + lxc_state2str(msg.value), cmd.rsp.ret); + return msg.value; +} + +static int lxc_cmd_state_server_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp = {0}; + struct state_client *newclient; + struct lxc_list *tmplist; + + if (req->datalen < 0) { + TRACE("requested datalen was < 0"); + return -1; + } + + if (!req->data) { + TRACE("no states requested"); + return -1; + } + + newclient = malloc(sizeof(*newclient)); + if (!newclient) + return -1; + + /* copy requested states */ + memcpy(newclient->states, req->data, sizeof(newclient->states)); + newclient->clientfd = fd; + + tmplist = malloc(sizeof(*tmplist)); + if (!tmplist) { + free(newclient); + return -1; + } + + lxc_list_add_elem(tmplist, newclient); + lxc_list_add_tail(&handler->state_clients, tmplist); + + TRACE("added state client %d to state client list", fd); + + return lxc_cmd_rsp_send(fd, &rsp); +} + static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler) { @@ -877,6 +1023,7 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback, [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback, [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback, + [LXC_CMD_STATE_SERVER] = lxc_cmd_state_server_callback, }; if (req->cmd >= LXC_CMD_MAX) { diff --git a/src/lxc/commands.h b/src/lxc/commands.h index 184eefa09..999c08d7d 100644 --- a/src/lxc/commands.h +++ b/src/lxc/commands.h @@ -43,6 +43,7 @@ typedef enum { LXC_CMD_GET_CONFIG_ITEM, LXC_CMD_GET_NAME, LXC_CMD_GET_LXCPATH, + LXC_CMD_STATE_SERVER, LXC_CMD_MAX, } lxc_cmd_t; @@ -82,8 +83,10 @@ extern char *lxc_cmd_get_config_item(const char *name, const char *item, const c extern char *lxc_cmd_get_name(const char *hashed_sock); extern char *lxc_cmd_get_lxcpath(const char *hashed_sock); extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath); -extern lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath); +extern int lxc_cmd_get_state(const char *name, const char *lxcpath); extern int lxc_cmd_stop(const char *name, const char *lxcpath); +extern int lxc_cmd_state_server(const char *name, const char *lxcpath, + lxc_state_t states[MAX_STATE]); struct lxc_epoll_descr; struct lxc_handler; diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 6bea638aa..1bed3833d 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -758,10 +758,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a return false; conf = c->lxc_conf; daemonize = c->daemonize; - container_mem_unlock(c); /* initialize handler */ handler = lxc_init_handler(c->name, conf, c->config_path); + container_mem_unlock(c); if (!handler) return false; @@ -800,7 +800,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a * the PID file, child will do the free and unlink. */ c->pidfile = NULL; - close(c->lxc_conf->maincmd_fd); + close(handler->conf->maincmd_fd); return wait_on_daemonized_start(c, pid); } diff --git a/src/lxc/start.c b/src/lxc/start.c index 0d1bdb5cc..0b5c85b1a 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -341,10 +341,64 @@ static int signal_handler(int fd, uint32_t events, void *data, return 1; } -int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state) +int lxc_set_state(const char *name, struct lxc_handler *handler, + lxc_state_t state) { + ssize_t ret; + struct lxc_list *cur, *next; + struct state_client *client; + struct lxc_msg msg = {.type = lxc_msg_state, .value = state}; + + process_lock(); + /* Only set state under process lock held so that we don't cause + * lxc_cmd_state_server() to miss a state. + */ handler->state = state; + TRACE("set container state to %s", lxc_state2str(state)); + + if (lxc_list_empty(&handler->state_clients)) { + TRACE("no state clients registered"); + process_unlock(); + return 0; + } + + strncpy(msg.name, name, sizeof(msg.name)); + msg.name[sizeof(msg.name) - 1] = 0; + + lxc_list_for_each_safe(cur, &handler->state_clients, next) { + client = cur->elem; + + if (!client->states[state]) { + TRACE("state %s not registered for state client %d", + lxc_state2str(state), client->clientfd); + continue; + } + + TRACE("sending state %s to state client %d", + lxc_state2str(state), client->clientfd); + + again: + ret = send(client->clientfd, &msg, sizeof(msg), 0); + if (ret < 0) { + if (errno == EINTR) + goto again; + + ERROR("failed to send message to client"); + } + + /* kick client from list */ + close(client->clientfd); + lxc_list_del(cur); + free(cur->elem); + free(cur); + } + process_unlock(); + + /* This function will try to connect to the legacy lxc-monitord state + * server and only exists for backwards compatibility. + */ lxc_monitor_send_state(name, state, handler->lxcpath); + return 0; } @@ -415,6 +469,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, handler->conf = conf; handler->lxcpath = lxcpath; handler->pinfd = -1; + lxc_list_init(&handler->state_clients); for (i = 0; i < LXC_NS_MAX; i++) handler->nsfd[i] = -1; @@ -455,12 +510,14 @@ int lxc_init(const char *name, struct lxc_handler *handler) ERROR("Failed loading seccomp policy."); goto out_close_maincmd_fd; } + TRACE("read seccomp policy"); /* Begin by setting the state to STARTING. */ if (lxc_set_state(name, handler, STARTING)) { ERROR("Failed to set state for container \"%s\" to \"%s\".", name, lxc_state2str(STARTING)); goto out_close_maincmd_fd; } + TRACE("set container state to \"STARTING\""); /* Start of environment variable setup for hooks. */ if (name && setenv("LXC_NAME", name, 1)) @@ -485,10 +542,13 @@ int lxc_init(const char *name, struct lxc_handler *handler) SYSERROR("Failed to set environment variable LXC_CGNS_AWARE=1."); /* End of environment variable setup for hooks. */ + TRACE("set environment variables"); + if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) { ERROR("Failed to run lxc.hook.pre-start for container \"%s\".", name); goto out_aborting; } + TRACE("ran pre-start hooks"); /* The signal fd has to be created before forking otherwise if the child * process exits before we setup the signal fd, the event will be lost @@ -499,19 +559,22 @@ int lxc_init(const char *name, struct lxc_handler *handler) ERROR("Failed to setup SIGCHLD fd handler."); goto out_delete_tty; } + TRACE("set up signal fd"); /* Do this after setting up signals since it might unblock SIGWINCH. */ if (lxc_console_create(conf)) { ERROR("Failed to create console for container \"%s\".", name); goto out_restore_sigmask; } + TRACE("created console"); if (lxc_ttys_shift_ids(conf) < 0) { ERROR("Failed to shift tty into container."); goto out_restore_sigmask; } + TRACE("shifted tty ids"); - INFO("Container \"%s\" is initialized.", name); + INFO("container \"%s\" is initialized", name); return 0; out_restore_sigmask: @@ -529,8 +592,9 @@ out_close_maincmd_fd: void lxc_fini(const char *name, struct lxc_handler *handler) { int i, rc; + struct lxc_list *cur, *next; pid_t self = getpid(); - char *namespaces[LXC_NS_MAX+1]; + char *namespaces[LXC_NS_MAX + 1]; size_t namespace_count = 0; /* The STOPPING state is there for future cleanup code which can take @@ -592,8 +656,23 @@ void lxc_fini(const char *name, struct lxc_handler *handler) lxc_console_delete(&handler->conf->console); lxc_delete_tty(&handler->conf->tty_info); + + /* close the command socket */ close(handler->conf->maincmd_fd); handler->conf->maincmd_fd = -1; + + /* The command socket is now closed, no more state clients can register + * themselves from now on. So free the list of state clients. + */ + lxc_list_for_each_safe(cur, &handler->state_clients, next) { + struct state_client *client = cur->elem; + /* close state client socket */ + close(client->clientfd); + lxc_list_del(cur); + free(cur->elem); + free(cur); + } + free(handler->name); if (handler->ttysock[0] != -1) { close(handler->ttysock[0]); diff --git a/src/lxc/start.h b/src/lxc/start.h index 7b8ebcf92..aa723537d 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -32,16 +32,6 @@ #include "state.h" #include "namespace.h" - -struct lxc_handler; - -struct lxc_operations { - int (*start)(struct lxc_handler *, void *); - int (*post_start)(struct lxc_handler *, void *); -}; - -struct cgroup_desc; - struct lxc_handler { pid_t pid; char *name; @@ -60,8 +50,18 @@ struct lxc_handler { bool backgrounded; // indicates whether should we close std{in,out,err} on start int nsfd[LXC_NS_MAX]; int netnsfd; + struct lxc_list state_clients; }; +struct lxc_operations { + int (*start)(struct lxc_handler *, void *); + int (*post_start)(struct lxc_handler *, void *); +}; + +struct state_client { + int clientfd; + lxc_state_t states[MAX_STATE]; +}; extern int lxc_poll(const char *name, struct lxc_handler *handler); extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state); diff --git a/src/lxc/state.c b/src/lxc/state.c index a5f6df8b0..31c6590ab 100644 --- a/src/lxc/state.c +++ b/src/lxc/state.c @@ -79,7 +79,7 @@ lxc_state_t lxc_getstate(const char *name, const char *lxcpath) return state; } -static int fillwaitedstates(const char *strstates, int *states) +static int fillwaitedstates(const char *strstates, lxc_state_t *states) { char *token, *saveptr = NULL; char *strstates_dup = strdup(strstates); @@ -108,90 +108,21 @@ static int fillwaitedstates(const char *strstates, int *states) extern int lxc_wait(const char *lxcname, const char *states, int timeout, const char *lxcpath) { - struct lxc_msg msg; int state; - int s[MAX_STATE] = {0}, fd = -1, ret = -1; + lxc_state_t s[MAX_STATE] = {0}; if (fillwaitedstates(states, s)) return -1; - /* - * if container present, - * then check if already in requested state - */ - state = lxc_getstate(lxcname, lxcpath); + state = lxc_cmd_state_server(lxcname, lxcpath, s); if (state < 0) { - goto out_close; - } else if ((state >= 0) && (s[state])) { - ret = 0; - goto out_close; - } - - if (lxc_monitord_spawn(lxcpath)) + SYSERROR("failed to receive state from monitor"); return -1; + } - fd = lxc_monitor_open(lxcpath); - if (fd < 0) + TRACE("retrieved state of container %s", lxc_state2str(state)); + if (!s[state]) return -1; - for (;;) { - int64_t elapsed_time, curtime = 0; - struct timespec tspec; - int stop = 0; - int retval; - - if (timeout != -1) { - retval = clock_gettime(CLOCK_REALTIME, &tspec); - if (retval) - goto out_close; - curtime = tspec.tv_sec; - } - if (lxc_monitor_read_timeout(fd, &msg, timeout) < 0) { - /* try again if select interrupted by signal */ - if (errno != EINTR) - goto out_close; - } - - if (timeout != -1) { - retval = clock_gettime(CLOCK_REALTIME, &tspec); - if (retval) - goto out_close; - elapsed_time = tspec.tv_sec - curtime; - if (timeout - elapsed_time <= 0) - stop = 1; - timeout -= elapsed_time; - } - - if (strcmp(lxcname, msg.name)) { - if (stop) { - ret = -2; - goto out_close; - } - continue; - } - - switch (msg.type) { - case lxc_msg_state: - if (msg.value < 0 || msg.value >= MAX_STATE) - goto out_close; - - if (s[msg.value]) { - ret = 0; - goto out_close; - } - break; - default: - if (stop) { - ret = -2; - goto out_close; - } - /* just ignore garbage */ - break; - } - } - -out_close: - if (fd >= 0) - lxc_monitor_close(fd); - return ret; + return 0; } diff --git a/src/lxc/state.h b/src/lxc/state.h index 4d26ce6b5..98d15b9ee 100644 --- a/src/lxc/state.h +++ b/src/lxc/state.h @@ -24,8 +24,15 @@ #define __LXC_STATE_H typedef enum { - STOPPED, STARTING, RUNNING, STOPPING, - ABORTING, FREEZING, FROZEN, THAWED, MAX_STATE, + STOPPED, + STARTING, + RUNNING, + STOPPING, + ABORTING, + FREEZING, + FROZEN, + THAWED, + MAX_STATE, } lxc_state_t; extern int lxc_rmstate(const char *name); -- 2.47.3