]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
execute: don't exec init, call it
authorTycho Andersen <tycho@tycho.pizza>
Mon, 28 Jun 2021 14:38:48 +0000 (08:38 -0600)
committerChristian Brauner <christian.brauner@ubuntu.com>
Tue, 29 Jun 2021 08:39:36 +0000 (10:39 +0200)
Instead of having a statically linked init that we put on the host fs
somewhere via packaging, have to either bind mount in or detect fexecve()
functionality, let's just call it as a library function. This way we don't
have to do any of that.

This also fixes up a bunch of conditions from:

if (quiet)
    fprintf(stderr, "log message");

to

if (!quiet)
    fprintf(stderr, "log message");

:)

and it drops all the code for fexecve() detection and bind mounting our
init in, since we no longer need any of that.

A couple other thoughts:

* I left the lxc-init binary in since we ship it, so someone could be using
  it outside of the internal uses.
* There are lots of unused arguments to lxc-init (including presumably
  --quiet, since nobody noticed the above); those may be part of the API
  though and so we don't want to drop them.

Signed-off-by: Tycho Andersen <tycho@tycho.pizza>
src/lxc/cmd/lxc_init.c
src/lxc/conf.c
src/lxc/execute.c
src/lxc/initutils.c
src/lxc/initutils.h
src/lxc/start.h

index 3ae1bf6fae668eb3472289fd127d9e0e6050914a..cb1b348c4b1e757c613529d92abe7cf77e67df86 100644 (file)
 #include <libgen.h>
 #include <limits.h>
 #include <pthread.h>
-#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <sys/wait.h>
 #include <unistd.h>
 
 #include <lxc/lxccontainer.h>
 
 #include "compiler.h"
 #include "config.h"
-#include "error.h"
 #include "initutils.h"
 #include "memory_utils.h"
 #include "parse.h"
-#include "process_utils.h"
 #include "string_utils.h"
 
 /* option keys for long only options */
 #define QUOTE(macro) #macro
 #define QUOTEVAL(macro) QUOTE(macro)
 
-static sig_atomic_t was_interrupted;
-
-static void interrupt_handler(int sig)
-{
-       if (!was_interrupted)
-               was_interrupted = sig;
-}
-
 static struct option long_options[] = {
            { "name",        required_argument, 0, 'n'         },
            { "help",        no_argument,       0, 'h'         },
@@ -75,119 +63,6 @@ static struct arguments my_args = {
        .shortopts = short_options
 };
 
-static void prevent_forking(void)
-{
-       __do_free char *line = NULL;
-       __do_fclose FILE *f = NULL;
-       char path[PATH_MAX];
-       size_t len = 0;
-
-       f = fopen("/proc/self/cgroup", "re");
-       if (!f)
-               return;
-
-       while (getline(&line, &len, f) != -1) {
-               __do_close int fd = -EBADF;
-               int ret;
-               char *p, *p2;
-
-               p = strchr(line, ':');
-               if (!p)
-                       continue;
-               p++;
-               p2 = strchr(p, ':');
-               if (!p2)
-                       continue;
-               *p2 = '\0';
-
-               /* This is a cgroup v2 entry. Skip it. */
-               if ((p2 - p) == 0)
-                       continue;
-
-               if (strcmp(p, "pids") != 0)
-                       continue;
-               p2++;
-
-               p2 += lxc_char_left_gc(p2, strlen(p2));
-               p2[lxc_char_right_gc(p2, strlen(p2))] = '\0';
-
-               ret = snprintf(path, sizeof(path),
-                              "/sys/fs/cgroup/pids/%s/pids.max", p2);
-               if (ret < 0 || (size_t)ret >= sizeof(path)) {
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to create string\n");
-                       return;
-               }
-
-               fd = open(path, O_WRONLY | O_CLOEXEC);
-               if (fd < 0) {
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to open \"%s\"\n", path);
-                       return;
-               }
-
-               ret = write(fd, "1", 1);
-               if (ret != 1 && !my_args.quiet)
-                       fprintf(stderr, "Failed to write to \"%s\"\n", path);
-
-               return;
-       }
-}
-
-static void kill_children(pid_t pid)
-{
-       __do_fclose FILE *f = NULL;
-       char path[PATH_MAX];
-       int ret;
-
-       ret = snprintf(path, sizeof(path), "/proc/%d/task/%d/children", pid, pid);
-       if (ret < 0 || (size_t)ret >= sizeof(path)) {
-               if (my_args.quiet)
-                       fprintf(stderr, "Failed to create string\n");
-               return;
-       }
-
-       f = fopen(path, "re");
-       if (!f) {
-               if (my_args.quiet)
-                       fprintf(stderr, "Failed to open %s\n", path);
-               return;
-       }
-
-       while (!feof(f)) {
-               pid_t find_pid;
-
-               if (fscanf(f, "%d ", &find_pid) != 1) {
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to retrieve pid\n");
-                       return;
-               }
-
-               (void)kill_children(find_pid);
-               (void)kill(find_pid, SIGKILL);
-       }
-}
-
-static void remove_self(void)
-{
-       int ret;
-       ssize_t n;
-       char path[PATH_MAX] = {0};
-
-       n = readlink("/proc/self/exe", path, sizeof(path));
-       if (n < 0 || n >= PATH_MAX)
-               return;
-       path[n] = '\0';
-
-       ret = umount2(path, MNT_DETACH);
-       if (ret < 0)
-               return;
-
-       ret = unlink(path);
-       if (ret < 0)
-               return;
-}
-
 __noreturn static void print_usage_exit(const struct option longopts[])
 
 {
@@ -283,12 +158,6 @@ static int arguments_parse(struct arguments *args, int argc,
 
 int main(int argc, char *argv[])
 {
-       int i, logfd, ret;
-       pid_t pid;
-       struct sigaction act;
-       sigset_t mask, omask;
-       int have_status = 0, exit_with = 1, shutdown = 0;
-
        if (arguments_parse(&my_args, argc, argv))
                exit(EXIT_FAILURE);
 
@@ -298,221 +167,5 @@ int main(int argc, char *argv[])
                exit(EXIT_FAILURE);
        }
 
-       /* Mask all the signals so we are safe to install a signal handler and
-        * to fork.
-        */
-       ret = sigfillset(&mask);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&mask, SIGILL);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&mask, SIGSEGV);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&mask, SIGBUS);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = pthread_sigmask(SIG_SETMASK, &mask, &omask);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigfillset(&act.sa_mask);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&act.sa_mask, SIGILL);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&act.sa_mask, SIGSEGV);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&act.sa_mask, SIGBUS);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&act.sa_mask, SIGSTOP);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = sigdelset(&act.sa_mask, SIGKILL);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       act.sa_flags = 0;
-       act.sa_handler = interrupt_handler;
-
-       for (i = 1; i < NSIG; i++) {
-               /* Exclude some signals: ILL, SEGV and BUS are likely to reveal
-                * a bug and we want a core. STOP and KILL cannot be handled
-                * anyway: they're here for documentation. 32 and 33 are not
-                * defined.
-                */
-               if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
-                   i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
-                       continue;
-
-               ret = sigaction(i, &act, NULL);
-               if (ret < 0) {
-                       if (errno == EINVAL)
-                               continue;
-
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to change signal action\n");
-                       exit(EXIT_FAILURE);
-               }
-       }
-
-       remove_self();
-
-       pid = fork();
-       if (pid < 0)
-               exit(EXIT_FAILURE);
-
-       if (!pid) {
-               /* restore default signal handlers */
-               for (i = 1; i < NSIG; i++) {
-                       sighandler_t sigerr;
-
-                       if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
-                           i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
-                               continue;
-
-                       sigerr = signal(i, SIG_DFL);
-                       if (sigerr == SIG_ERR && !my_args.quiet)
-                               fprintf(stderr, "Failed to reset to default action for signal \"%d\": %d\n", i, pid);
-               }
-
-               ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
-               if (ret < 0) {
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to set signal mask\n");
-                       exit(EXIT_FAILURE);
-               }
-
-               (void)setsid();
-
-               (void)ioctl(STDIN_FILENO, TIOCSCTTY, 0);
-
-               ret = execvp(my_args.argv[0], my_args.argv);
-               if (my_args.quiet)
-                       fprintf(stderr, "Failed to exec \"%s\"\n", my_args.argv[0]);
-               exit(ret);
-       }
-       logfd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
-       if (logfd >= 0) {
-               ret = dup3(logfd, STDERR_FILENO, O_CLOEXEC);
-               if (ret < 0)
-                       exit(EXIT_FAILURE);
-       }
-
-       (void)setproctitle("init");
-
-       /* Let's process the signals now. */
-       ret = sigdelset(&omask, SIGALRM);
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-
-       ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
-       if (ret < 0) {
-               if (my_args.quiet)
-                       fprintf(stderr, "Failed to set signal mask\n");
-               exit(EXIT_FAILURE);
-       }
-
-       /* No need of other inherited fds but stderr. */
-       close(STDIN_FILENO);
-       close(STDOUT_FILENO);
-
-       for (;;) {
-               int status;
-               pid_t waited_pid;
-
-               switch (was_interrupted) {
-               case 0:
-               /* Some applications send SIGHUP in order to get init to reload
-                * its configuration. We don't want to forward this onto the
-                * application itself, because it probably isn't expecting this
-                * signal since it was expecting init to do something with it.
-                *
-                * Instead, let's explicitly ignore it here. The actual
-                * terminal case is handled in the monitor's handler, which
-                * sends this task a SIGTERM in the case of a SIGHUP, which is
-                * what we want.
-                */
-               case SIGHUP:
-                       break;
-               case SIGPWR:
-               case SIGTERM:
-                       if (!shutdown) {
-                               pid_t mypid = lxc_raw_getpid();
-
-                               shutdown = 1;
-                               prevent_forking();
-                               if (mypid != 1) {
-                                       kill_children(mypid);
-                               } else {
-                                       ret = kill(-1, SIGTERM);
-                                       if (ret < 0 && !my_args.quiet)
-                                               fprintf(stderr, "Failed to send SIGTERM to all children\n");
-                               }
-                               alarm(1);
-                       }
-                       break;
-               case SIGALRM: {
-                       pid_t mypid = lxc_raw_getpid();
-
-                       prevent_forking();
-                       if (mypid != 1) {
-                               kill_children(mypid);
-                       } else {
-                               ret = kill(-1, SIGKILL);
-                               if (ret < 0 && !my_args.quiet)
-                                       fprintf(stderr, "Failed to send SIGTERM to all children\n");
-                       }
-                       break;
-               }
-               default:
-                       kill(pid, was_interrupted);
-                       break;
-               }
-               ret = EXIT_SUCCESS;
-
-               was_interrupted = 0;
-               waited_pid = wait(&status);
-               if (waited_pid < 0) {
-                       if (errno == ECHILD)
-                               goto out;
-
-                       if (errno == EINTR)
-                               continue;
-
-                       if (my_args.quiet)
-                               fprintf(stderr, "Failed to wait on child %d\n", pid);
-                       ret = -1;
-                       goto out;
-               }
-
-               /* Reset timer each time a process exited. */
-               if (shutdown)
-                       alarm(1);
-
-               /* Keep the exit code of the started application (not wrapped
-                * pid) and continue to wait for the end of the orphan group.
-                */
-               if (waited_pid == pid && !have_status) {
-                       exit_with = lxc_error_set_and_log(waited_pid, status);
-                       have_status = 1;
-               }
-       }
-out:
-       if (ret < 0)
-               exit(EXIT_FAILURE);
-       exit(exit_with);
+       lxc_container_init(my_args.argc, my_args.argv, my_args.quiet);
 }
index aa0aa4ec66f850ba6a1af0f171a56139e49140ca..95ce867eef56e16e3081422bb3d7331a12c3c1f6 100644 (file)
@@ -3727,58 +3727,6 @@ static void turn_into_dependent_mounts(const struct lxc_rootfs *rootfs)
        TRACE("Turned all mount table entries into dependent mount");
 }
 
-static int lxc_execute_bind_init(struct lxc_handler *handler)
-{
-       int ret;
-       char *p;
-       char path[PATH_MAX], destpath[PATH_MAX];
-       struct lxc_conf *conf = handler->conf;
-
-       /* If init exists in the container, don't bind mount a static one */
-       p = choose_init(conf->rootfs.mount);
-       if (p) {
-               __do_free char *old = p;
-
-               p = strdup(old + strlen(conf->rootfs.mount));
-               if (!p)
-                       return -ENOMEM;
-
-               INFO("Found existing init at \"%s\"", p);
-               goto out;
-       }
-
-       ret = strnprintf(path, sizeof(path), SBINDIR "/init.lxc.static");
-       if (ret < 0)
-               return -1;
-
-       if (!file_exists(path))
-               return log_error_errno(-1, errno, "The file \"%s\" does not exist on host", path);
-
-       ret = strnprintf(destpath, sizeof(path), "%s" P_tmpdir "%s", conf->rootfs.mount, "/.lxc-init");
-       if (ret < 0)
-               return -1;
-
-       if (!file_exists(destpath)) {
-               ret = mknod(destpath, S_IFREG | 0000, 0);
-               if (ret < 0 && errno != EEXIST)
-                       return log_error_errno(-1, errno, "Failed to create \"%s\" file as bind mount target", destpath);
-       }
-
-       ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
-       if (ret < 0)
-               return log_error_errno(-1, errno, "Failed to bind mount lxc.init.static into container");
-
-       p = strdup(destpath + strlen(conf->rootfs.mount));
-       if (!p)
-               return -ENOMEM;
-
-       INFO("Bind mounted lxc.init.static into container at \"%s\"", path);
-out:
-       ((struct execute_args *)handler->data)->init_fd = -1;
-       ((struct execute_args *)handler->data)->init_path = p;
-       return 0;
-}
-
 /* This does the work of remounting / if it is shared, calling the container
  * pre-mount hooks, and mounting the rootfs.
  */
@@ -3848,15 +3796,6 @@ static bool verify_start_hooks(struct lxc_conf *conf)
        return true;
 }
 
-static bool execveat_supported(void)
-{
-       execveat(-1, "", NULL, NULL, AT_EMPTY_PATH);
-       if (errno == ENOSYS)
-               return false;
-
-       return true;
-}
-
 static int lxc_setup_boot_id(void)
 {
        int ret;
@@ -4176,28 +4115,6 @@ int lxc_setup(struct lxc_handler *handler)
        if (lxc_conf->rootfs.dfd_dev < 0 && errno != ENOENT)
                return log_error_errno(-errno, errno, "Failed to open \"/dev\"");
 
-       if (lxc_conf->is_execute) {
-               if (execveat_supported()) {
-                       int fd;
-                       char path[STRLITERALLEN(SBINDIR) + STRLITERALLEN("/init.lxc.static") + 1];
-
-                       ret = strnprintf(path, sizeof(path), SBINDIR "/init.lxc.static");
-                       if (ret < 0)
-                               return log_error(-1, "Path to init.lxc.static too long");
-
-                       fd = open(path, O_NOCTTY | O_NOFOLLOW | O_CLOEXEC | O_PATH);
-                       if (fd < 0)
-                               return log_error_errno(-1, errno, "Unable to open lxc.init.static");
-
-                       ((struct execute_args *)handler->data)->init_fd = fd;
-                       ((struct execute_args *)handler->data)->init_path = NULL;
-               } else {
-                       ret = lxc_execute_bind_init(handler);
-                       if (ret < 0)
-                               return log_error(-1, "Failed to bind-mount the lxc init system");
-               }
-       }
-
        /* Now mount only cgroups, if wanted. Before, /sys could not have been
         * mounted. It is guaranteed to be mounted now either through
         * automatically or via fstab entries.
index 7175ef2cf209561f8982bf0e5f1091c7fe3fa7d3..346e127ab49ecbcf247565dced02c87edc482a52 100644 (file)
 #include "start.h"
 #include "process_utils.h"
 #include "utils.h"
+#include "initutils.h"
 
 lxc_log_define(execute, start);
 
 static int execute_start(struct lxc_handler *handler, void* data)
 {
-       int argc_add, j;
-       char **argv;
-       int argc = 0, i = 0;
+       int argc = 0;
        struct execute_args *my_args = data;
 
        while (my_args->argv[argc++]);
 
-       /* lxc-init -n name -- [argc] NULL -> 5 */
-       argc_add = 5;
-       if (my_args->quiet)
-               argc_add++;
-
-       if (!handler->conf->rootfs.path)
-               argc_add += 2;
-
-       argv = malloc((argc + argc_add) * sizeof(*argv));
-       if (!argv) {
-               SYSERROR("Allocating init args failed");
-               goto out1;
-       }
-
-       if (my_args->init_path)
-               argv[i++] = my_args->init_path;
-       else
-               argv[i++] = "lxc-init";
-
-       argv[i++] = "-n";
-       argv[i++] = (char *)handler->name;
-
-       if (my_args->quiet)
-               argv[i++] = "--quiet";
-
-       if (!handler->conf->rootfs.path) {
-               argv[i++] = "-P";
-               argv[i++] = (char *)handler->lxcpath;
-       }
-
-       argv[i++] = "--";
-       for (j = 0; j < argc; j++)
-               argv[i++] = my_args->argv[j];
-       argv[i++] = NULL;
-
-       NOTICE("Exec'ing \"%s\"", my_args->argv[0]);
-
-       if (my_args->init_fd >= 0)
-               execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH);
-       else
-               execvp(argv[0], argv);
-       SYSERROR("Failed to exec %s", argv[0]);
-
-       free(argv);
-out1:
-       return 1;
+       lxc_container_init(argc, my_args->argv, my_args->quiet);
 }
 
 static int execute_post_start(struct lxc_handler *handler, void* data)
index 969d2052f70d937ee3c4db87c9186e9b14a778da..f1b012b77869bc94e3cda703ccd80760361578e2 100644 (file)
@@ -5,14 +5,18 @@
 #endif
 #include <sys/prctl.h>
 #include <sys/syscall.h>
+#include <sys/wait.h>
 #include <unistd.h>
+#include <signal.h>
 
 #include "compiler.h"
 #include "config.h"
+#include "error.h"
 #include "file_utils.h"
 #include "initutils.h"
 #include "macro.h"
 #include "memory_utils.h"
+#include "process_utils.h"
 
 #ifndef HAVE_STRLCPY
 #include "include/strlcpy.h"
@@ -308,3 +312,346 @@ int setproctitle(char *title)
 
        return ret;
 }
+
+static void prevent_forking(void)
+{
+       __do_free char *line = NULL;
+       __do_fclose FILE *f = NULL;
+       char path[PATH_MAX];
+       size_t len = 0;
+
+       f = fopen("/proc/self/cgroup", "re");
+       if (!f)
+               return;
+
+       while (getline(&line, &len, f) != -1) {
+               __do_close int fd = -EBADF;
+               int ret;
+               char *p, *p2;
+
+               p = strchr(line, ':');
+               if (!p)
+                       continue;
+               p++;
+               p2 = strchr(p, ':');
+               if (!p2)
+                       continue;
+               *p2 = '\0';
+
+               /* This is a cgroup v2 entry. Skip it. */
+               if ((p2 - p) == 0)
+                       continue;
+
+               if (strcmp(p, "pids") != 0)
+                       continue;
+               p2++;
+
+               p2 += lxc_char_left_gc(p2, strlen(p2));
+               p2[lxc_char_right_gc(p2, strlen(p2))] = '\0';
+
+               ret = snprintf(path, sizeof(path),
+                              "/sys/fs/cgroup/pids/%s/pids.max", p2);
+               if (ret < 0 || (size_t)ret >= sizeof(path)) {
+                       fprintf(stderr, "Failed to create string\n");
+                       return;
+               }
+
+               fd = open(path, O_WRONLY | O_CLOEXEC);
+               if (fd < 0) {
+                       fprintf(stderr, "Failed to open \"%s\"\n", path);
+                       return;
+               }
+
+               ret = write(fd, "1", 1);
+               if (ret != 1)
+                       fprintf(stderr, "Failed to write to \"%s\"\n", path);
+
+               return;
+       }
+}
+
+static void kill_children(pid_t pid)
+{
+       __do_fclose FILE *f = NULL;
+       char path[PATH_MAX];
+       int ret;
+
+       ret = snprintf(path, sizeof(path), "/proc/%d/task/%d/children", pid, pid);
+       if (ret < 0 || (size_t)ret >= sizeof(path)) {
+               fprintf(stderr, "Failed to create string\n");
+               return;
+       }
+
+       f = fopen(path, "re");
+       if (!f) {
+               fprintf(stderr, "Failed to open %s\n", path);
+               return;
+       }
+
+       while (!feof(f)) {
+               pid_t find_pid;
+
+               if (fscanf(f, "%d ", &find_pid) != 1) {
+                       fprintf(stderr, "Failed to retrieve pid\n");
+                       return;
+               }
+
+               (void)kill_children(find_pid);
+               (void)kill(find_pid, SIGKILL);
+       }
+}
+
+static void remove_self(void)
+{
+       int ret;
+       ssize_t n;
+       char path[PATH_MAX] = {0};
+
+       n = readlink("/proc/self/exe", path, sizeof(path));
+       if (n < 0 || n >= PATH_MAX)
+               return;
+       path[n] = '\0';
+
+       ret = umount2(path, MNT_DETACH);
+       if (ret < 0)
+               return;
+
+       ret = unlink(path);
+       if (ret < 0)
+               return;
+}
+
+static sig_atomic_t was_interrupted;
+
+static void interrupt_handler(int sig)
+{
+       if (!was_interrupted)
+               was_interrupted = sig;
+}
+
+__noreturn int lxc_container_init(int argc, char *const *argv, bool quiet)
+{
+       int i, logfd, ret;
+       pid_t pid;
+       struct sigaction act;
+       sigset_t mask, omask;
+       int have_status = 0, exit_with = 1, shutdown = 0;
+
+       /* Mask all the signals so we are safe to install a signal handler and
+        * to fork.
+        */
+       ret = sigfillset(&mask);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&mask, SIGILL);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&mask, SIGSEGV);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&mask, SIGBUS);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = pthread_sigmask(SIG_SETMASK, &mask, &omask);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigfillset(&act.sa_mask);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&act.sa_mask, SIGILL);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&act.sa_mask, SIGSEGV);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&act.sa_mask, SIGBUS);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&act.sa_mask, SIGSTOP);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = sigdelset(&act.sa_mask, SIGKILL);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       act.sa_flags = 0;
+       act.sa_handler = interrupt_handler;
+
+       for (i = 1; i < NSIG; i++) {
+               /* Exclude some signals: ILL, SEGV and BUS are likely to reveal
+                * a bug and we want a core. STOP and KILL cannot be handled
+                * anyway: they're here for documentation. 32 and 33 are not
+                * defined.
+                */
+               if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
+                   i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
+                       continue;
+
+               ret = sigaction(i, &act, NULL);
+               if (ret < 0) {
+                       if (errno == EINVAL)
+                               continue;
+
+                       if (!quiet)
+                               fprintf(stderr, "Failed to change signal action\n");
+                       exit(EXIT_FAILURE);
+               }
+       }
+
+       remove_self();
+
+       pid = fork();
+       if (pid < 0)
+               exit(EXIT_FAILURE);
+
+       if (!pid) {
+               /* restore default signal handlers */
+               for (i = 1; i < NSIG; i++) {
+                       sighandler_t sigerr;
+
+                       if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
+                           i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
+                               continue;
+
+                       sigerr = signal(i, SIG_DFL);
+                       if (sigerr == SIG_ERR && !quiet)
+                               fprintf(stderr, "Failed to reset to default action for signal \"%d\": %d\n", i, pid);
+               }
+
+               ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
+               if (ret < 0) {
+                       if (quiet)
+                               fprintf(stderr, "Failed to set signal mask\n");
+                       exit(EXIT_FAILURE);
+               }
+
+               (void)setsid();
+
+               (void)ioctl(STDIN_FILENO, TIOCSCTTY, 0);
+
+               ret = execvp(argv[0], argv);
+               if (!quiet)
+                       fprintf(stderr, "Failed to exec \"%s\"\n", argv[0]);
+               exit(ret);
+       }
+       logfd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
+       if (logfd >= 0) {
+               ret = dup3(logfd, STDERR_FILENO, O_CLOEXEC);
+               if (ret < 0)
+                       exit(EXIT_FAILURE);
+       }
+
+       (void)setproctitle("init");
+
+       /* Let's process the signals now. */
+       ret = sigdelset(&omask, SIGALRM);
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+
+       ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
+       if (ret < 0) {
+               if (!quiet)
+                       fprintf(stderr, "Failed to set signal mask\n");
+               exit(EXIT_FAILURE);
+       }
+
+       /* No need of other inherited fds but stderr. */
+       close(STDIN_FILENO);
+       close(STDOUT_FILENO);
+
+       for (;;) {
+               int status;
+               pid_t waited_pid;
+
+               switch (was_interrupted) {
+               case 0:
+               /* Some applications send SIGHUP in order to get init to reload
+                * its configuration. We don't want to forward this onto the
+                * application itself, because it probably isn't expecting this
+                * signal since it was expecting init to do something with it.
+                *
+                * Instead, let's explicitly ignore it here. The actual
+                * terminal case is handled in the monitor's handler, which
+                * sends this task a SIGTERM in the case of a SIGHUP, which is
+                * what we want.
+                */
+               case SIGHUP:
+                       break;
+               case SIGPWR:
+               case SIGTERM:
+                       if (!shutdown) {
+                               pid_t mypid = lxc_raw_getpid();
+
+                               shutdown = 1;
+                               prevent_forking();
+                               if (mypid != 1) {
+                                       kill_children(mypid);
+                               } else {
+                                       ret = kill(-1, SIGTERM);
+                                       if (ret < 0 && !quiet)
+                                               fprintf(stderr, "Failed to send SIGTERM to all children\n");
+                               }
+                               alarm(1);
+                       }
+                       break;
+               case SIGALRM: {
+                       pid_t mypid = lxc_raw_getpid();
+
+                       prevent_forking();
+                       if (mypid != 1) {
+                               kill_children(mypid);
+                       } else {
+                               ret = kill(-1, SIGKILL);
+                               if (ret < 0 && !quiet)
+                                       fprintf(stderr, "Failed to send SIGTERM to all children\n");
+                       }
+                       break;
+               }
+               default:
+                       kill(pid, was_interrupted);
+                       break;
+               }
+               ret = EXIT_SUCCESS;
+
+               was_interrupted = 0;
+               waited_pid = wait(&status);
+               if (waited_pid < 0) {
+                       if (errno == ECHILD)
+                               goto out;
+
+                       if (errno == EINTR)
+                               continue;
+
+                       if (!quiet)
+                               fprintf(stderr, "Failed to wait on child %d\n", pid);
+                       ret = -1;
+                       goto out;
+               }
+
+               /* Reset timer each time a process exited. */
+               if (shutdown)
+                       alarm(1);
+
+               /* Keep the exit code of the started application (not wrapped
+                * pid) and continue to wait for the end of the orphan group.
+                */
+               if (waited_pid == pid && !have_status) {
+                       exit_with = lxc_error_set_and_log(waited_pid, status);
+                       have_status = 1;
+               }
+       }
+out:
+       if (ret < 0)
+               exit(EXIT_FAILURE);
+       exit(exit_with);
+}
index d7b6fc4464d1bf2887f06df61709910093d824fc..e99de13af6beda186d2ddb41bc6db7d23f698c45 100644 (file)
@@ -54,4 +54,6 @@ __hidden extern const char *lxc_global_config_value(const char *option_name);
 
 __hidden extern int setproctitle(char *title);
 
+__hidden __noreturn int lxc_container_init(int argc, char *const *argv, bool quiet);
+
 #endif /* __LXC_INITUTILS_H */
index fa1593b984a4c2bae81a2be0fc9a4d5f462bd5bc..e3aa0e95e38b8e64a05039cb93a760e0eb93aa5a 100644 (file)
@@ -136,8 +136,6 @@ struct lxc_handler {
 };
 
 struct execute_args {
-       char *init_path;
-       int init_fd;
        char *const *argv;
        int quiet;
 };