#include "config.h"
#include "apparmor.h"
#include "utils.h"
+#include "commands.h"
+#include "cgroup.h"
+
+#if HAVE_SYS_PERSONALITY_H
+#include <sys/personality.h>
+#endif
lxc_log_define(lxc_attach, lxc);
}
snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
- fd[i] = open(path, O_RDONLY);
+ fd[i] = open(path, O_RDONLY | O_CLOEXEC);
if (fd[i] < 0) {
saved_errno = errno;
/* TODO: we should also parse supplementary groups and use
* setgroups() to set them */
}
+
+struct attach_clone_payload {
+ int ipc_socket;
+ lxc_attach_options_t* options;
+ struct lxc_proc_context_info* init_ctx;
+ lxc_attach_exec_t exec_function;
+ void* exec_payload;
+};
+
+static int attach_child_main(void* data);
+
+/* help the optimizer along if it doesn't know that exit always exits */
+#define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0)
+
+/* define default options if no options are supplied by the user */
+static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
+
+int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
+{
+ int ret, status;
+ pid_t init_pid, pid, attached_pid;
+ struct lxc_proc_context_info *init_ctx;
+ char* cwd;
+ char* new_cwd;
+ int ipc_sockets[2];
+
+ if (!options)
+ options = &attach_static_default_options;
+
+ init_pid = lxc_cmd_get_init_pid(name, lxcpath);
+ if (init_pid < 0) {
+ ERROR("failed to get the init pid");
+ return -1;
+ }
+
+ init_ctx = lxc_proc_get_context_info(init_pid);
+ if (!init_ctx) {
+ ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
+ return -1;
+ }
+
+ cwd = getcwd(NULL, 0);
+
+ /* determine which namespaces the container was created with
+ * by asking lxc-start, if necessary
+ */
+ if (options->namespaces == -1) {
+ options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
+ /* call failed */
+ if (options->namespaces == -1) {
+ ERROR("failed to automatically determine the "
+ "namespaces which the container unshared");
+ free(cwd);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+ return -1;
+ }
+ }
+
+ /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
+ * to make sure we don't irritate other threads that want to fork+exec away
+ *
+ * IMPORTANT: if the initial process is multithreaded and another call
+ * just fork()s away without exec'ing directly after, the socket fd will
+ * exist in the forked process from the other thread and any close() in
+ * our own child process will not really cause the socket to close properly,
+ * potentiall causing the parent to hang.
+ *
+ * For this reason, while IPC is still active, we have to use shutdown()
+ * if the child exits prematurely in order to signal that the socket
+ * is closed and cannot assume that the child exiting will automatically
+ * do that.
+ *
+ * IPC mechanism: (X is receiver)
+ * initial process intermediate attached
+ * X <--- send pid of
+ * attached proc,
+ * then exit
+ * send 0 ------------------------------------> X
+ * [do initialization]
+ * X <------------------------------------ send 1
+ * [add to cgroup, ...]
+ * send 2 ------------------------------------> X
+ * close socket close socket
+ * run program
+ */
+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ if (ret < 0) {
+ SYSERROR("could not set up required IPC mechanism for attaching");
+ free(cwd);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+ return -1;
+ }
+
+ /* create intermediate subprocess, three reasons:
+ * 1. runs all pthread_atfork handlers and the
+ * child will no longer be threaded
+ * (we can't properly setns() in a threaded process)
+ * 2. we can't setns() in the child itself, since
+ * we want to make sure we are properly attached to
+ * the pidns
+ * 3. also, the initial thread has to put the attached
+ * process into the cgroup, which we can only do if
+ * we didn't already setns() (otherwise, user
+ * namespaces will hate us)
+ */
+ pid = fork();
+
+ if (pid < 0) {
+ SYSERROR("failed to create first subprocess");
+ free(cwd);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+ return -1;
+ }
+
+ if (pid) {
+ pid_t to_cleanup_pid = pid;
+ int expected = 0;
+
+ /* inital thread, we close the socket that is for the
+ * subprocesses
+ */
+ close(ipc_sockets[1]);
+ free(cwd);
+
+ /* get pid from intermediate process */
+ ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
+ if (ret <= 0) {
+ if (ret != 0)
+ ERROR("error using IPC to receive pid of attached process");
+ goto cleanup_error;
+ }
+
+ /* reap intermediate process */
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ goto cleanup_error;
+
+ /* we will always have to reap the grandchild now */
+ to_cleanup_pid = attached_pid;
+
+ /* tell attached process it may start initializing */
+ status = 0;
+ ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
+ if (ret <= 0) {
+ ERROR("error using IPC to notify attached process for initialization (0)");
+ goto cleanup_error;
+ }
+
+ /* wait for the attached process to finish initializing */
+ expected = 1;
+ ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
+ if (ret <= 0) {
+ if (ret != 0)
+ ERROR("error using IPC to receive notification from attached process (1)");
+ goto cleanup_error;
+ }
+
+ /* attach to cgroup, if requested */
+ if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
+ ret = lxc_cgroup_attach(attached_pid, name, lxcpath);
+ if (ret < 0) {
+ ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
+ goto cleanup_error;
+ }
+ }
+
+ /* tell attached process we're done */
+ status = 2;
+ ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
+ if (ret <= 0) {
+ ERROR("error using IPC to notify attached process for initialization (2)");
+ goto cleanup_error;
+ }
+
+ /* now shut down communication with child, we're done */
+ shutdown(ipc_sockets[0], SHUT_RDWR);
+ close(ipc_sockets[0]);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+
+ /* we're done, the child process should now execute whatever
+ * it is that the user requested. The parent can now track it
+ * with waitpid() or similar.
+ */
+
+ *attached_process = attached_pid;
+ return 0;
+
+ cleanup_error:
+ /* first shut down the socket, then wait for the pid,
+ * otherwise the pid we're waiting for may never exit
+ */
+ shutdown(ipc_sockets[0], SHUT_RDWR);
+ close(ipc_sockets[0]);
+ if (to_cleanup_pid)
+ (void) wait_for_pid(to_cleanup_pid);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+ return -1;
+ }
+
+ /* first subprocess begins here, we close the socket that is for the
+ * initial thread
+ */
+ close(ipc_sockets[0]);
+
+ /* attach now, create another subprocess later, since pid namespaces
+ * only really affect the children of the current process
+ */
+ ret = lxc_attach_to_ns(init_pid, options->namespaces);
+ if (ret < 0) {
+ ERROR("failed to enter the namespace");
+ shutdown(ipc_sockets[1], SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* attach succeeded, try to cwd */
+ if (options->initial_cwd)
+ new_cwd = options->initial_cwd;
+ else
+ new_cwd = cwd;
+ ret = chdir(new_cwd);
+ if (ret < 0)
+ WARN("could not change directory to '%s'", new_cwd);
+ free(cwd);
+
+ /* now create the real child process */
+ {
+ struct attach_clone_payload payload = {
+ .ipc_socket = ipc_sockets[1],
+ .options = options,
+ .init_ctx = init_ctx,
+ .exec_function = exec_function,
+ .exec_payload = exec_payload
+ };
+ /* We use clone_parent here to make this subprocess a direct child of
+ * the initial process. Then this intermediate process can exit and
+ * the parent can directly track the attached process.
+ */
+ pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
+ }
+
+ /* shouldn't happen, clone() should always return positive pid */
+ if (pid <= 0) {
+ SYSERROR("failed to create subprocess");
+ shutdown(ipc_sockets[1], SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* tell grandparent the pid of the pid of the newly created child */
+ ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
+ if (ret != sizeof(pid)) {
+ /* if this really happens here, this is very unfortunate, since the
+ * parent will not know the pid of the attached process and will
+ * not be able to wait for it (and we won't either due to CLONE_PARENT)
+ * so the parent won't be able to reap it and the attached process
+ * will remain a zombie
+ */
+ ERROR("error using IPC to notify main process of pid of the attached process");
+ shutdown(ipc_sockets[1], SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* the rest is in the hands of the initial and the attached process */
+ rexit(0);
+}
+
+int attach_child_main(void* data)
+{
+ struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
+ int ipc_socket = payload->ipc_socket;
+ lxc_attach_options_t* options = payload->options;
+ struct lxc_proc_context_info* init_ctx = payload->init_ctx;
+ long new_personality;
+ int ret;
+ int status;
+ int expected;
+ long flags;
+ int fd;
+ uid_t new_uid;
+ gid_t new_gid;
+
+ /* wait for the initial thread to signal us that it's ready
+ * for us to start initializing
+ */
+ expected = 0;
+ status = -1;
+ ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
+ if (ret <= 0) {
+ ERROR("error using IPC to receive notification from initial process (0)");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* load apparmor profile */
+ if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_APPARMOR)) {
+ ret = attach_apparmor(init_ctx->aa_profile);
+ if (ret < 0) {
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+ }
+
+ /* A description of the purpose of this functionality is
+ * provided in the lxc-attach(1) manual page. We have to
+ * remount here and not in the parent process, otherwise
+ * /proc may not properly reflect the new pid namespace.
+ */
+ if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
+ ret = lxc_attach_remount_sys_proc();
+ if (ret < 0) {
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+ }
+
+ /* now perform additional attachments*/
+#if HAVE_SYS_PERSONALITY_H
+ if (options->personality < 0)
+ new_personality = init_ctx->personality;
+ else
+ new_personality = options->personality;
+
+ if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
+ ret = personality(new_personality);
+ if (ret < 0) {
+ SYSERROR("could not ensure correct architecture");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+ }
+#endif
+
+ if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
+ ret = lxc_attach_drop_privs(init_ctx);
+ if (ret < 0) {
+ ERROR("could not drop privileges");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+ }
+
+ /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
+ ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
+ if (ret < 0) {
+ ERROR("could not set initial environment for attached process");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* set user / group id */
+ new_uid = 0;
+ new_gid = 0;
+ /* ignore errors, we will fall back to root in that case
+ * (/proc was not mounted etc.)
+ */
+ if (options->namespaces & CLONE_NEWUSER)
+ lxc_attach_get_init_uidgid(&new_uid, &new_gid);
+
+ if (options->uid != (uid_t)-1)
+ new_uid = options->uid;
+ if (options->gid != (gid_t)-1)
+ new_gid = options->gid;
+
+ /* try to set the uid/gid combination */
+ if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER) && setgid(new_gid)) {
+ SYSERROR("switching to container gid");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+ if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
+ SYSERROR("switching to container uid");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* tell initial process it may now put us into the cgroups */
+ status = 1;
+ ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
+ if (ret != sizeof(status)) {
+ ERROR("error using IPC to notify initial process for initialization (1)");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+
+ /* wait for the initial thread to signal us that it has done
+ * everything for us when it comes to cgroups etc.
+ */
+ expected = 2;
+ status = -1;
+ ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
+ if (ret <= 0) {
+ ERROR("error using IPC to receive final notification from initial process (2)");
+ shutdown(ipc_socket, SHUT_RDWR);
+ rexit(-1);
+ }
+
+ shutdown(ipc_socket, SHUT_RDWR);
+ close(ipc_socket);
+ free(init_ctx->aa_profile);
+ free(init_ctx);
+
+ /* The following is done after the communication socket is
+ * shut down. That way, all errors that might (though
+ * unlikely) occur up until this point will have their messages
+ * printed to the original stderr (if logging is so configured)
+ * and not the fd the user supplied, if any.
+ */
+
+ /* fd handling for stdin, stdout and stderr;
+ * ignore errors here, user may want to make sure
+ * the fds are closed, for example */
+ if (options->stdin_fd >= 0 && options->stdin_fd != 0)
+ dup2(options->stdin_fd, 0);
+ if (options->stdout_fd >= 0 && options->stdout_fd != 1)
+ dup2(options->stdout_fd, 1);
+ if (options->stderr_fd >= 0 && options->stderr_fd != 2)
+ dup2(options->stderr_fd, 2);
+
+ /* close the old fds */
+ if (options->stdin_fd > 2)
+ close(options->stdin_fd);
+ if (options->stdout_fd > 2)
+ close(options->stdout_fd);
+ if (options->stderr_fd > 2)
+ close(options->stderr_fd);
+
+ /* try to remove CLOEXEC flag from stdin/stdout/stderr,
+ * but also here, ignore errors */
+ for (fd = 0; fd <= 2; fd++) {
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ continue;
+ if (flags & FD_CLOEXEC)
+ fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC);
+ }
+
+ /* we're done, so we can now do whatever the user intended us to do */
+ rexit(payload->exec_function(payload->exec_payload));
+}
+
+int lxc_attach_run_command(void* payload)
+{
+ lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
+
+ execvp(cmd->program, cmd->argv);
+ SYSERROR("failed to exec '%s'", cmd->program);
+ return -1;
+}
+
+int lxc_attach_run_shell(void* payload)
+{
+ uid_t uid;
+ struct passwd *passwd;
+ char *user_shell;
+
+ /* ignore payload parameter */
+ (void)payload;
+
+ uid = getuid();
+ passwd = getpwuid(uid);
+
+ /* this probably happens because of incompatible nss
+ * implementations in host and container (remember, this
+ * code is still using the host's glibc but our mount
+ * namespace is in the container)
+ * we may try to get the information by spawning a
+ * [getent passwd uid] process and parsing the result
+ */
+ if (!passwd)
+ user_shell = lxc_attach_getpwshell(uid);
+ else
+ user_shell = passwd->pw_shell;
+
+ if (user_shell)
+ execlp(user_shell, user_shell, NULL);
+
+ /* executed if either no passwd entry or execvp fails,
+ * we will fall back on /bin/sh as a default shell
+ */
+ execlp("/bin/sh", "/bin/sh", NULL);
+ SYSERROR("failed to exec shell");
+ return -1;
+}
#define _attach_h
#include <sys/types.h>
+#include "attach_options.h"
struct lxc_proc_context_info {
char *aa_profile;
extern struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid);
-typedef enum lxc_attach_env_policy_t {
- LXC_ATTACH_KEEP_ENV,
- LXC_ATTACH_CLEAR_ENV
-} lxc_attach_env_policy_t;
-
extern int lxc_attach_to_ns(pid_t other_pid, int which);
extern int lxc_attach_remount_sys_proc();
extern int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx);
extern void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid);
+extern int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process);
+
#endif
--- /dev/null
+/*
+ * lxc: linux Container library
+ *
+ * (C) Copyright IBM Corp. 2007, 2008
+ *
+ * Authors:
+ * Daniel Lezcano <daniel.lezcano at free.fr>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LXC_ATTACH_OPTIONS_H
+#define _LXC_ATTACH_OPTIONS_H
+
+#include <sys/types.h>
+
+typedef enum lxc_attach_env_policy_t {
+ LXC_ATTACH_KEEP_ENV,
+ LXC_ATTACH_CLEAR_ENV
+} lxc_attach_env_policy_t;
+
+enum {
+ /* the following are on by default: */
+ LXC_ATTACH_MOVE_TO_CGROUP = 0x00000001,
+ LXC_ATTACH_DROP_CAPABILITIES = 0x00000002,
+ LXC_ATTACH_SET_PERSONALITY = 0x00000004,
+ LXC_ATTACH_APPARMOR = 0x00000008,
+
+ /* the following are off by default */
+ LXC_ATTACH_REMOUNT_PROC_SYS = 0x00010000,
+
+ /* we have 16 bits for things that are on by default
+ * and 16 bits that are off by default, that should
+ * be sufficient to keep binary compatibility for
+ * a while
+ */
+ LXC_ATTACH_DEFAULT = 0x0000FFFF
+};
+
+typedef struct lxc_attach_options_t lxc_attach_options_t;
+typedef int (*lxc_attach_exec_t)(void* payload);
+
+struct lxc_attach_options_t {
+ /* any combination of the above enum */
+ int attach_flags;
+ /* the namespaces to attach to (CLONE_NEW... flags) */
+ int namespaces;
+ /* initial personality, -1 to autodetect
+ * (may be ignored if lxc is compiled w/o personality support) */
+ long personality;
+
+ /* inital current directory, use NULL to use cwd
+ * (might not exist in container, then / will be
+ * used because of kernel defaults)
+ */
+ char* initial_cwd;
+
+ /* the uid and gid to attach to,
+ * -1 for default (init uid/gid for userns containers,
+ * otherwise or if detection fails 0/0)
+ */
+ uid_t uid;
+ gid_t gid;
+
+ /* environment handling */
+ lxc_attach_env_policy_t env_policy;
+ char** extra_env_vars;
+ char** extra_keep_env;
+
+ /* file descriptors for stdin, stdout and stderr,
+ * dup2() will be used before calling exec_function,
+ * (assuming not 0, 1 and 2 are specified) and the
+ * original fds are closed before passing control
+ * over. Any O_CLOEXEC flag will be removed after
+ * that
+ */
+ int stdin_fd;
+ int stdout_fd;
+ int stderr_fd;
+};
+
+#define LXC_ATTACH_OPTIONS_DEFAULT \
+ { \
+ /* .attach_flags = */ LXC_ATTACH_DEFAULT, \
+ /* .namespaces = */ -1, \
+ /* .personality = */ -1, \
+ /* .initial_cwd = */ NULL, \
+ /* .uid = */ (uid_t)-1, \
+ /* .gid = */ (gid_t)-1, \
+ /* .env_policy = */ LXC_ATTACH_KEEP_ENV, \
+ /* .extra_env_vars = */ NULL, \
+ /* .extra_keep_env = */ NULL, \
+ /* .stdin_fd = */ 0, 1, 2 \
+ }
+
+typedef struct lxc_attach_command_t {
+ char* program; /* the program to run (passed to execvp) */
+ char** argv; /* the argv pointer of that program, including the program itself in argv[0] */
+} lxc_attach_command_t;
+
+/* default execution functions:
+ * run_command: pointer to lxc_attach_command_t
+ * run_shell: no payload, will be ignored
+ */
+extern int lxc_attach_run_command(void* payload);
+extern int lxc_attach_run_shell(void* payload);
+
+#endif
*/
#define _GNU_SOURCE
-#include <unistd.h>
-#include <errno.h>
-#include <pwd.h>
-#include <stdlib.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/socket.h>
#include <sys/wait.h>
+#include <sys/types.h>
#include "attach.h"
-#include "commands.h"
#include "arguments.h"
-#include "caps.h"
-#include "cgroup.h"
#include "config.h"
#include "confile.h"
-#include "start.h"
-#include "sync.h"
-#include "log.h"
#include "namespace.h"
-#include "apparmor.h"
-
-#if HAVE_SYS_PERSONALITY_H
-#include <sys/personality.h>
-#endif
+#include "caps.h"
+#include "log.h"
+#include "utils.h"
lxc_log_define(lxc_attach_ui, lxc);
.checker = NULL,
};
-struct child_data {
- struct lxc_proc_context_info *init_ctx;
- struct lxc_handler *handler;
- int ipc_socket;
-};
-
-static int child_main(void* data)
-{
- struct child_data* child_data = data;
- struct lxc_proc_context_info *init_ctx = child_data->init_ctx;
- struct lxc_handler *handler = child_data->handler;
- int ipc_socket = child_data->ipc_socket;
- struct passwd *passwd;
- char *user_shell;
- uid_t uid;
- int ret;
-
- lxc_sync_fini_parent(handler);
- close(ipc_socket);
-
- if ((namespace_flags & CLONE_NEWNS)) {
- if (attach_apparmor(init_ctx->aa_profile) < 0) {
- ERROR("failed switching apparmor profiles");
- return -1;
- }
- }
-
- /* A description of the purpose of this functionality is
- * provided in the lxc-attach(1) manual page. We have to
- * remount here and not in the parent process, otherwise
- * /proc may not properly reflect the new pid namespace.
- */
- if (!(namespace_flags & CLONE_NEWNS) && remount_sys_proc) {
- ret = lxc_attach_remount_sys_proc();
- if (ret < 0) {
- return -1;
- }
- }
-
-#if HAVE_SYS_PERSONALITY_H
- if (new_personality < 0)
- new_personality = init_ctx->personality;
-
- if (personality(new_personality) == -1) {
- ERROR("could not ensure correct architecture: %s",
- strerror(errno));
- return -1;
- }
-#endif
-
- if (!elevated_privileges && lxc_attach_drop_privs(init_ctx)) {
- ERROR("could not drop privileges");
- return -1;
- }
-
- if (lxc_attach_set_environment(env_policy, NULL, NULL)) {
- ERROR("could not set environment");
- return -1;
- }
-
- /* tell parent we are done setting up the container and wait
- * until we have been put in the container's cgroup, if
- * applicable */
- if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
- return -1;
-
- lxc_sync_fini(handler);
-
- if (namespace_flags & CLONE_NEWUSER) {
- uid_t init_uid = 0;
- gid_t init_gid = 0;
-
- /* ignore errors, we will fall back to root in that case
- * (/proc was not mounted etc.)
- */
- lxc_attach_get_init_uidgid(&init_uid, &init_gid);
-
- /* try to set the uid/gid combination */
- if (setgid(init_gid)) {
- SYSERROR("switching to container gid");
- return -1;
- }
- if (setuid(init_uid)) {
- SYSERROR("switching to container uid");
- return -1;
- }
- }
-
- if (my_args.argc) {
- execvp(my_args.argv[0], my_args.argv);
- SYSERROR("failed to exec '%s'", my_args.argv[0]);
- return -1;
- }
-
- uid = getuid();
-
- passwd = getpwuid(uid);
-
- /* this probably happens because of incompatible nss
- * implementations in host and container (remember, this
- * code is still using the host's glibc but our mount
- * namespace is in the container)
- * we may try to get the information by spawning a
- * [getent passwd uid] process and parsing the result
- */
- if (!passwd)
- user_shell = lxc_attach_getpwshell(uid);
- else
- user_shell = passwd->pw_shell;
-
- if (user_shell) {
- char *const args[] = {
- user_shell,
- NULL,
- };
-
- (void) execvp(args[0], args);
- }
-
- /* executed if either no passwd entry or execvp fails,
- * we will fall back on /bin/sh as a default shell
- */
- {
- char *const args[] = {
- "/bin/sh",
- NULL,
- };
-
- execvp(args[0], args);
- SYSERROR("failed to exec '%s'", args[0]);
- return -1;
- }
-}
-
int main(int argc, char *argv[])
{
int ret;
- pid_t pid, init_pid;
- struct lxc_proc_context_info *init_ctx;
- struct lxc_handler *handler;
- char *curdir;
- int cgroup_ipc_sockets[2];
+ pid_t pid;
+ lxc_attach_options_t attach_options = LXC_ATTACH_OPTIONS_DEFAULT;
+ lxc_attach_command_t command;
ret = lxc_caps_init();
if (ret)
if (ret)
return ret;
- init_pid = lxc_cmd_get_init_pid(my_args.name, my_args.lxcpath[0]);
- if (init_pid < 0) {
- ERROR("failed to get the init pid");
- return -1;
- }
+ if (remount_sys_proc)
+ attach_options.attach_flags |= LXC_ATTACH_REMOUNT_PROC_SYS;
+ if (elevated_privileges)
+ attach_options.attach_flags &= ~(LXC_ATTACH_MOVE_TO_CGROUP | LXC_ATTACH_DROP_CAPABILITIES | LXC_ATTACH_APPARMOR);
+ attach_options.namespaces = namespace_flags;
+ attach_options.personality = new_personality;
+ attach_options.env_policy = env_policy;
- init_ctx = lxc_proc_get_context_info(init_pid);
- if (!init_ctx) {
- ERROR("failed to get context of the init process, pid = %d", init_pid);
- return -1;
- }
-
- curdir = getcwd(NULL, 0);
-
- /* determine which namespaces the container was created with
- * by asking lxc-start
- */
- if (namespace_flags == -1) {
- namespace_flags = lxc_cmd_get_clone_flags(my_args.name, my_args.lxcpath[0]);
- /* call failed */
- if (namespace_flags == -1) {
- ERROR("failed to automatically determine the "
- "namespaces which the container unshared");
- return -1;
- }
- }
-
- /* For the cgroup attaching logic to work in conjunction with pid and user namespaces,
- * we need to have the following hierarchy:
- *
- * lxc-attach [process executed externally]
- * | socketpair(cgroup_ipc_sockets)
- * | fork() -> child
- * | | setns()
- * | | fork() -> grandchild
- * | | | initialize
- * | | | signal parent
- * | |<------------------|----+
- * | | signal parent |
- * |<----------------------|-----+ |
- * | add to cgroups | |
- * | signal child -------->| |
- * | | signal child ---->|
- * | waitpid() | waitpid() | exec()
- * | |<------------------| exit()
- * |<----------------------| exit()
- * | exit()
- *
- * The rationale is the following: The first parent is needed because after
- * setns() (mount + user namespace) we can't access the cgroup filesystem
- * to add the pid to the corresponding cgroup. Therefore, we need to do that
- * in a process executed on the host, so that's why we need to fork and wait
- * for it to have done some initialization (cgroups may restrict certain
- * operations so we have to do that in the end) and use IPC for signaling.
- *
- * Then in the child process we do the setns(). However, a process is never
- * really attached to a pid namespace (never changes its pid, doesn't appear
- * in the pid namespace /proc), only child processes of that process are
- * truely inside the new pid namespace. That's why we need to fork() again
- * after setns() before performing final initializations, then signal our
- * parent, which signals the primary process, which does cgroup adding,
- * which then signals to the grandchild that it can exec().
- */
- ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, cgroup_ipc_sockets);
- if (ret < 0) {
- SYSERROR("could not set up required IPC mechanism for attaching");
- return -1;
- }
-
- pid = fork();
- if (pid < 0) {
- SYSERROR("failed to create first subprocess");
- return -1;
- }
-
- if (pid) {
- int status;
- pid_t grandchild;
-
- close(cgroup_ipc_sockets[1]);
-
- gparent_reread:
- ret = read(cgroup_ipc_sockets[0], &grandchild, sizeof(grandchild));
- if (ret <= 0) {
- if (ret < 0 && (errno == EAGAIN || errno == EINTR))
- goto gparent_reread;
- ERROR("failed to get pid of attached process to add to cgroup");
- return -1;
- }
-
- if (!elevated_privileges) {
- ret = lxc_cgroup_attach(grandchild, my_args.name, my_args.lxcpath[0]);
- if (ret < 0) {
- ERROR("failed to attach process to cgroup");
- return -1;
- }
- }
-
- status = 0;
- ret = write(cgroup_ipc_sockets[0], &status, sizeof(status));
- if (ret <= 0) {
- ERROR("failed to signal child that cgroup logic has finished");
- return -1;
- }
-
- close(cgroup_ipc_sockets[0]);
-
- gparent_again:
- ret = waitpid(pid, &status, 0);
- if (ret < 0) {
- if (errno == EINTR)
- goto gparent_again;
- SYSERROR("failed to wait for process '%d'", pid);
- return -1;
- }
-
- if (WIFEXITED(status))
- return WEXITSTATUS(status);
-
- return -1;
- }
-
- /* at this point we are in the 'parent' process so we need to close the
- * socket reserved for the 'grandparent' process
- */
- close(cgroup_ipc_sockets[0]);
-
- /* we need to attach before we fork since certain namespaces
- * (such as pid namespaces) only really affect children of the
- * current process and not the process itself
- */
- ret = lxc_attach_to_ns(init_pid, namespace_flags);
- if (ret < 0) {
- ERROR("failed to enter the namespace");
- return -1;
+ if (my_args.argc) {
+ command.program = my_args.argv[0];
+ command.argv = (char**)my_args.argv;
+ ret = lxc_attach(my_args.name, my_args.lxcpath[0], lxc_attach_run_command, &command, &attach_options, &pid);
+ } else {
+ ret = lxc_attach(my_args.name, my_args.lxcpath[0], lxc_attach_run_shell, NULL, &attach_options, &pid);
}
- if (curdir && chdir(curdir))
- WARN("could not change directory to '%s'", curdir);
-
- free(curdir);
-
- /* hack: we need sync.h infrastructure - and that needs a handler
- * FIXME: perhaps we should also just use a very simple socketpair()
- * here? - like with the grandparent <-> parent communication?
- */
- handler = calloc(1, sizeof(*handler));
-
- if (lxc_sync_init(handler)) {
- ERROR("failed to initialize synchronization socket");
+ if (ret < 0)
return -1;
- }
- {
- struct child_data child_data = {
- .init_ctx = init_ctx,
- .handler = handler,
- .ipc_socket = cgroup_ipc_sockets[1]
- };
- pid = lxc_clone(child_main, &child_data, 0);
- }
-
- if (pid < 0) {
- SYSERROR("failed to fork");
+ ret = lxc_wait_for_pid_status(pid);
+ if (ret < 0)
return -1;
- }
-
- if (pid) {
- int status;
-
- lxc_sync_fini_child(handler);
-
- /* wait until the child has done configuring itself before
- * we put it in a cgroup that potentially limits these
- * possibilities */
- if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
- return -1;
-
- /* ask grandparent to add child to cgroups, the grandparent will
- * itself check whether that's actually necessary
- */
- ret = write(cgroup_ipc_sockets[1], &pid, sizeof(pid));
- if (ret != sizeof(pid)) {
- ERROR("error using IPC to notify main process of pid to add to the cgroups of the container");
- return -1;
- }
-
- parent_reread:
- /* we need some mechanism to check whether the grandparent could
- * add us to the cgroups or not - so we await a dummy integer
- * on the same socket (that's why we don't use a pipe - we need
- * two-way communication). So if the parent fails and exits, that
- * will close the socket, which will cause a read of 0 bytes for
- * us, so we just terminate. If we read at least a byte, we don't
- * care about the contents...
- */
- ret = read(cgroup_ipc_sockets[1], &status, sizeof(status));
- if (ret <= 0) {
- if (ret < 0 && (errno == EAGAIN || errno == EINTR))
- goto parent_reread;
- /* only print someting if we can't assume the parent already
- * gave an error message, that will reduce confusion for the
- * user
- */
- if (ret != 0)
- ERROR("failed to get notification that the child process was added to the container's cgroups");
- return -1;
- }
-
- /* we don't need that IPC interface anymore */
- close(cgroup_ipc_sockets[1]);
-
- /* tell the child we are done initializing */
- if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
- return -1;
- lxc_sync_fini(handler);
-
- again:
- if (waitpid(pid, &status, 0) < 0) {
- if (errno == EINTR)
- goto again;
- SYSERROR("failed to wait '%d'", pid);
- return -1;
- }
-
- if (WIFEXITED(status))
- return WEXITSTATUS(status);
-
- return -1;
- }
+ if (WIFEXITED(ret))
+ return WEXITSTATUS(ret);
- /* shouldn't happen, because clone should never return 0 */
return -1;
}
#include <errno.h>
#include <sys/types.h>
+#include <unistd.h>
#include "config.h"
/* returns 1 on success, 0 if there were any failures */