From: Christian Brauner Date: Sat, 13 Apr 2019 14:41:30 +0000 (+0200) Subject: start: handle setting pdeath signal in new pidns X-Git-Tag: lxc-4.0.0~113^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4d8bdfa0304fcf5a0355aa5ee4381c5c21a1bab1;p=thirdparty%2Flxc.git start: handle setting pdeath signal in new pidns In the usual case the child runs in a separate pid namespace. So far we haven't been able to reliably set the pdeath signal. When we set the pdeath signal we need to verify that we haven't lost a race whereby we have been orphaned and though we have set a pdeath signal it won't help us since, well, the parent is dead. We were able to correctly handle this case when we were in the same pidns since getppid() will return a valid pid. When we are in a separate pidns 0 will be returned since the parent doesn't exist in our pidns. A while back, while Jann and I were discussing other things he came up with a nifty idea: simply pass an fd for the parent's status file and check the "State:" field. This is the implementation of that idea. Suggested-by: Jann Horn Signed-off-by: Christian Brauner --- diff --git a/src/lxc/start.c b/src/lxc/start.c index 32267b773..a9a07bc83 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -709,6 +709,9 @@ void lxc_free_handler(struct lxc_handler *handler) if (handler->conf->maincmd_fd >= 0) lxc_abstract_unix_close(handler->conf->maincmd_fd); + if (handler->monitor_status_fd >= 0) + close(handler->monitor_status_fd); + if (handler->state_socket_pair[0] >= 0) close(handler->state_socket_pair[0]); @@ -743,6 +746,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, handler->data_sock[0] = handler->data_sock[1] = -1; handler->conf = conf; handler->lxcpath = lxcpath; + handler->monitor_status_fd = -EBADF; handler->pinfd = -1; handler->pidfd = -EBADF; handler->proc_pidfd = -EBADF; @@ -795,11 +799,17 @@ on_error: int lxc_init(const char *name, struct lxc_handler *handler) { + __do_close_prot_errno int status_fd = -EBADF; int ret; const char *loglevel; struct lxc_conf *conf = handler->conf; handler->monitor_pid = lxc_raw_getpid(); + status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC); + if (status_fd < 0) { + SYSERROR("Failed to open monitor status fd"); + goto out_close_maincmd_fd; + } lsm_init(); TRACE("Initialized LSM"); @@ -930,6 +940,7 @@ int lxc_init(const char *name, struct lxc_handler *handler) TRACE("Initialized LSM"); INFO("Container \"%s\" is initialized", name); + handler->monitor_status_fd = move_fd(status_fd); return 0; out_destroy_cgroups: @@ -1131,6 +1142,7 @@ static int do_start(void *data) struct lxc_handler *handler = data; ATTR_UNUSED __do_close_prot_errno int data_sock0 = handler->data_sock[0], data_sock1 = handler->data_sock[1]; + __do_close_prot_errno int status_fd = -EBADF; int ret; uid_t new_uid; gid_t new_gid; @@ -1141,13 +1153,18 @@ static int do_start(void *data) lxc_sync_fini_parent(handler); + if (lxc_abstract_unix_recv_fds(handler->data_sock[1], &status_fd, 1, NULL, 0) < 0) { + ERROR("Failed to receive status file descriptor to child process"); + goto out_warn_father; + } + /* This prctl must be before the synchro, so if the parent dies before * we set the parent death signal, we will detect its death with the * synchro right after, otherwise we have a window where the parent can * exit before we set the pdeath signal leading to a unsupervized * container. */ - ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid); + ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd); if (ret < 0) { SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL"); goto out_warn_father; @@ -1227,7 +1244,7 @@ static int do_start(void *data) goto out_warn_father; /* set{g,u}id() clears deathsignal */ - ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid); + ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd); if (ret < 0) { SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL"); goto out_warn_father; @@ -1481,7 +1498,8 @@ static int do_start(void *data) } if (handler->conf->monitor_signal_pdeath != SIGKILL) { - ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, handler->monitor_pid); + ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, + handler->monitor_pid, status_fd); if (ret < 0) { SYSERROR("Failed to set PR_SET_PDEATHSIG to %d", handler->conf->monitor_signal_pdeath); @@ -1787,6 +1805,12 @@ static int lxc_spawn(struct lxc_handler *handler) lxc_sync_fini_child(handler); + if (lxc_abstract_unix_send_fds(handler->data_sock[0], &handler->monitor_status_fd, 1, NULL, 0) < 0) { + ERROR("Failed to send status file descriptor to child process"); + goto out_delete_net; + } + close_prot_errno_disarm(handler->monitor_status_fd); + /* Map the container uids. The container became an invalid userid the * moment it was cloned with CLONE_NEWUSER. This call doesn't change * anything immediately, but allows the container to setuid(0) (0 being diff --git a/src/lxc/start.h b/src/lxc/start.h index a3a5b4d54..1f81daf8e 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -114,6 +114,8 @@ struct lxc_handler { /* The monitor's pid. */ pid_t monitor_pid; + int monitor_status_fd; + /* Whether the child has already exited. */ bool init_died; diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 18f200568..8156f5b8c 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -1728,7 +1728,43 @@ uint64_t lxc_find_next_power2(uint64_t n) return n; } -int lxc_set_death_signal(int signal, pid_t parent) +static int process_dead(/* takes */ int status_fd) +{ + __do_close_prot_errno int dupfd = -EBADF; + __do_free char *line = NULL; + __do_fclose FILE *f = NULL; + int ret = 0; + size_t n = 0; + + dupfd = dup(status_fd); + if (dupfd < 0) + return -1; + + if (fd_cloexec(dupfd, true) < 0) + return -1; + + /* transfer ownership of fd */ + f = fdopen(move_fd(dupfd), "re"); + if (!f) + return -1; + + ret = 0; + while (getline(&line, &n, f) != -1) { + char *state; + + if (strncmp(line, "State:", 6)) + continue; + + state = lxc_trim_whitespace_in_place(line + 6); + /* only check whether process is dead or zombie for now */ + if (*state == 'X' || *state == 'Z') + ret = 1; + } + + return ret; +} + +int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) { int ret; pid_t ppid; @@ -1736,12 +1772,16 @@ int lxc_set_death_signal(int signal, pid_t parent) ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), prctl_arg(0), prctl_arg(0)); - /* If not in a PID namespace, check whether we have been orphaned. */ + /* verify that we haven't been orphaned in the meantime */ ppid = (pid_t)syscall(SYS_getppid); - if (ppid && ppid != parent) { - ret = raise(SIGKILL); - if (ret < 0) - return -1; + if (ppid == 0) { /* parent outside our pidns */ + if (parent_status_fd < 0) + return 0; + + if (process_dead(parent_status_fd) == 1) + return raise(SIGKILL); + } else if (ppid != parent) { + return raise(SIGKILL); } if (ret < 0) diff --git a/src/lxc/utils.h b/src/lxc/utils.h index c1667e8c4..97fa54536 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -256,7 +256,7 @@ static inline uint64_t lxc_getpagesize(void) extern uint64_t lxc_find_next_power2(uint64_t n); /* Set a signal the child process will receive after the parent has died. */ -extern int lxc_set_death_signal(int signal, pid_t parent); +extern int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd); extern int fd_cloexec(int fd, bool cloexec); extern int recursive_destroy(char *dirname); extern int lxc_setup_keyring(void);