]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
Add support for checkpoint and restore via CRIU
authorTycho Andersen <tycho.andersen@canonical.com>
Tue, 26 Aug 2014 14:09:36 +0000 (09:09 -0500)
committerStéphane Graber <stgraber@ubuntu.com>
Tue, 26 Aug 2014 14:40:05 +0000 (10:40 -0400)
This patch adds support for checkpointing and restoring containers via CRIU.
It adds two api calls, ->checkpoint and ->restore, which are wrappers around
the CRIU CLI. CRIU has an RPC API, but reasons for preferring exec() are
discussed in [1].

To checkpoint, users specify a directory to dump the container metadata (CRIU
dump files, plus some additional information about veth pairs and which
bridges they are attached to) into this directory. On restore, this
information is read out of the directory, a CRIU command line is constructed,
and CRIU is exec()d. CRIU uses the lxc-restore-net callback (which in turn
inspects the image directory with the NIC data) to properly restore the
network.

This will only work with the current git master of CRIU; anything as of
a152c843 should work. There is a known bug where containers which have been
restored cannot be checkpointed [2].

[1]: http://lists.openvz.org/pipermail/criu/2014-July/015117.html
[2]: http://lists.openvz.org/pipermail/criu/2014-August/015876.html

v2: fixed some problems with the s/int/bool return code form api function
v3: added a testcase, fixed up the man page synopsis
v4: fix a small typo in lxc-test-checkpoint-restore
v5: remove a reference to the old CRIU_PATH, and a bad error about the same

Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Stéphane Graber <stgraber@ubuntu.com>
15 files changed:
.gitignore
configure.ac
doc/Makefile.am
doc/lxc-checkpoint.sgml.in [new file with mode: 0644]
src/lxc/Makefile.am
src/lxc/lxc-restore-net [new file with mode: 0755]
src/lxc/lxc_checkpoint.c [new file with mode: 0644]
src/lxc/lxccontainer.c
src/lxc/lxccontainer.h
src/lxc/start.c
src/lxc/start.h
src/lxc/utils.c
src/lxc/utils.h
src/tests/Makefile.am
src/tests/lxc-test-checkpoint-restore [new file with mode: 0755]

index e6de18f609c7e0d5b71dd4c5816590f9a87f1df2..0b6ec69bc358580f044365c1cdb289c151f1f917 100644 (file)
@@ -49,6 +49,7 @@ src/lxc/lxc-attach
 src/lxc/lxc-autostart
 src/lxc/lxc-cgroup
 src/lxc/lxc-checkconfig
+src/lxc/lxc-checkpoint
 src/lxc/lxc-clone
 src/lxc/lxc-console
 src/lxc/lxc-config
index 1a55521cec426c4c7f3dc09346ba8b3ed1338780..882e759a19c6e0d530cc9eb010f76aad87c262ee 100644 (file)
@@ -644,6 +644,7 @@ AC_CONFIG_FILES([
        doc/lxc-autostart.sgml
        doc/lxc-cgroup.sgml
        doc/lxc-checkconfig.sgml
+       doc/lxc-checkpoint.sgml
        doc/lxc-clone.sgml
        doc/lxc-config.sgml
        doc/lxc-console.sgml
index bfe887ed6fb7eca98228f66108e469565f964faf..767ee3861ed8110452f2cca60fc7440812c46def 100644 (file)
@@ -20,6 +20,7 @@ man_MANS = \
        lxc-autostart.1 \
        lxc-cgroup.1 \
        lxc-checkconfig.1 \
+       lxc-checkpoint.1 \
        lxc-clone.1 \
        lxc-config.1 \
        lxc-console.1 \
diff --git a/doc/lxc-checkpoint.sgml.in b/doc/lxc-checkpoint.sgml.in
new file mode 100644 (file)
index 0000000..cb58074
--- /dev/null
@@ -0,0 +1,194 @@
+<!--
+
+(C) Copyright Canonical Ltd. 2014
+
+Authors:
+Tycho Andersen <tycho.andersen@canonical.com>
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+-->
+
+<!DOCTYPE refentry PUBLIC @docdtd@ [
+
+<!ENTITY commonoptions SYSTEM "@builddir@/common_options.sgml">
+<!ENTITY seealso SYSTEM "@builddir@/see_also.sgml">
+]>
+
+<refentry>
+
+  <docinfo><date>@LXC_GENERATE_DATE@</date></docinfo>
+
+  <refmeta>
+    <refentrytitle>lxc-checkpoint</refentrytitle>
+    <manvolnum>1</manvolnum>
+  </refmeta>
+
+  <refnamediv>
+    <refname>lxc-checkpoint</refname>
+
+    <refpurpose>
+      checkpoint a container
+    </refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>lxc-checkpoint</command>
+      <arg choice="req">-n <replaceable>name</replaceable></arg>
+      <arg choice="req">-D <replaceable>PATH</replaceable></arg>
+      <arg choice="opt">-r</arg>
+      <arg choice="opt">-s</arg>
+      <arg choice="opt">-v</arg>
+      <arg choice="opt">-d</arg>
+      <arg choice="opt">-F</arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+    <para>
+      <command>lxc-checkpoint</command> checkpoints and restores containers.
+    </para>
+  </refsect1>
+
+  <refsect1>
+    <title>Options</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>
+          <option>-r, --restore</option>
+        </term>
+        <listitem>
+          <para>
+            Restore the checkpoint for the container, instead of dumping it.
+            This option is incompatible with <option>-s</option>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-D <replacable>PATH</replacable>, --checkpoint-dir=<replacable>PATH</replacable></option>
+        </term>
+        <listitem>
+          <para>
+            The directory to dump the checkpoint metadata.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-s, --stop</option>
+        </term>
+        <listitem>
+          <para>
+            Optionally stop the container after dumping. This option is
+            incompatible with <option>-r</option>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-v, --verbose</option>
+        </term>
+        <listitem>
+          <para>
+            Enable verbose criu logging.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-d, --daemon</option>
+        </term>
+        <listitem>
+          <para>
+            Restore the container in the background (this is the default).
+            Only available when providing <option>-r</option>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-F, --foreground</option>
+        </term>
+        <listitem>
+          <para>
+            Restore the container in the foreground. Only available when
+            providing <option>-r</option>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &commonoptions;
+
+  <refsect1>
+    <title>Examples</title>
+    <variablelist>
+
+      <varlistentry>
+        <term>lxc-checkpoint -n foo -D /tmp/checkpoint</term>
+        <listitem>
+          <para>
+            Checkpoint the container foo into the directory /tmp/checkpoint.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>lxc-checkpoint -r -n foo -D /tmp/checkpoint</term>
+        <listitem>
+          <para>
+            Restore the checkpoint from the directory /tmp/checkpoint.
+          </para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
+  &seealso;
+
+  <refsect1>
+    <title>Author</title>
+    <para>Tycho Andersen <email>tycho.andersen@canonical.com</email></para>
+  </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
index c1a67d64da1bf8b1e7dcb8508c20b6237a3ec37f..26bb005f6831231506422827a2d019ac1c3d58fb 100644 (file)
@@ -184,6 +184,7 @@ bin_PROGRAMS = \
        lxc-attach \
        lxc-autostart \
        lxc-cgroup \
+       lxc-checkpoint \
        lxc-clone \
        lxc-config \
        lxc-console \
@@ -205,6 +206,8 @@ sbin_PROGRAMS = init.lxc
 pkglibexec_PROGRAMS = \
        lxc-monitord \
        lxc-user-nic
+pkglibexec_SCRIPTS = \
+       lxc-restore-net
 
 AM_LDFLAGS = -Wl,-E
 if ENABLE_RPATH
@@ -234,6 +237,7 @@ lxc_create_SOURCES = lxc_create.c
 lxc_snapshot_SOURCES = lxc_snapshot.c
 lxc_usernsexec_SOURCES = lxc_usernsexec.c
 lxc_user_nic_SOURCES = lxc_user_nic.c network.c network.h
+lxc_checkpoint_SOURCES = lxc_checkpoint.c
 
 if HAVE_STATIC_LIBCAP
 sbin_PROGRAMS += init.lxc.static
diff --git a/src/lxc/lxc-restore-net b/src/lxc/lxc-restore-net
new file mode 100755 (executable)
index 0000000..7d45583
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+[ -z "$CRTOOLS_IMAGE_DIR" ] && exit 1
+
+set -e
+
+dir="$CRTOOLS_IMAGE_DIR"
+
+i=0
+while [ -f "$dir/eth$i" ] && [ -f "$dir/veth$i" ] && [ -f "$dir/bridge$i" ]; do
+       veth=$(cat "$dir/veth$i")
+       bridge=$(cat "$dir/bridge$i")
+
+       if [ "$CRTOOLS_SCRIPT_ACTION" = "network-lock" ]; then
+               brctl delif $bridge $veth
+       fi
+
+       if [ "$CRTOOLS_SCRIPT_ACTION" = "network-unlock" ]; then
+               brctl addif $bridge $veth
+               ifconfig $veth 0.0.0.0 up
+       fi
+
+       i=$((i+1))
+done
diff --git a/src/lxc/lxc_checkpoint.c b/src/lxc/lxc_checkpoint.c
new file mode 100644 (file)
index 0000000..8dc2c17
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ *
+ * Copyright © 2014 Tycho Andersen <tycho.andersen@canonical.com>.
+ * Copyright © 2014 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <lxc/lxccontainer.h>
+
+#include "log.h"
+#include "config.h"
+#include "lxc.h"
+#include "arguments.h"
+
+static char *checkpoint_dir = NULL;
+static bool stop = false;
+static bool verbose = false;
+static bool do_restore = false;
+static bool daemonize_set = false;
+
+static const struct option my_longopts[] = {
+       {"checkpoint-dir", required_argument, 0, 'D'},
+       {"stop", no_argument, 0, 's'},
+       {"verbose", no_argument, 0, 'v'},
+       {"restore", no_argument, 0, 'r'},
+       {"daemon", no_argument, 0, 'd'},
+       {"foreground", no_argument, 0, 'F'},
+       LXC_COMMON_OPTIONS
+};
+
+static int my_checker(const struct lxc_arguments *args)
+{
+       if (do_restore && stop) {
+               lxc_error(args, "-s not compatible with -r.");
+               return -1;
+
+       } else if (!do_restore && daemonize_set) {
+               lxc_error(args, "-d/-F not compatible with -r.");
+               return -1;
+       }
+
+       if (checkpoint_dir == NULL) {
+               lxc_error(args, "-D is required.");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int my_parser(struct lxc_arguments *args, int c, char *arg)
+{
+       switch (c) {
+       case 'D':
+               checkpoint_dir = strdup(arg);
+               if (!checkpoint_dir)
+                       return -1;
+               break;
+       case 's':
+               stop = true;
+               break;
+       case 'v':
+               verbose = true;
+               break;
+       case 'r':
+               do_restore = true;
+               break;
+       case 'd':
+               args->daemonize = 1;
+               daemonize_set = true;
+               break;
+       case 'F':
+               args->daemonize = 0;
+               daemonize_set = true;
+               break;
+       }
+       return 0;
+}
+
+static struct lxc_arguments my_args = {
+       .progname  = "lxc-checkpoint",
+       .help      = "\
+--name=NAME\n\
+\n\
+lxc-checkpoint checkpoints and restores a container\n\
+  Serializes a container's running state to disk to allow restoring it in\n\
+  its running state at a later time.\n\
+\n\
+Options :\n\
+  -n, --name=NAME           NAME for name of the container\n\
+  -r, --restore             Restore container\n\
+  -D, --checkpoint-dir=DIR  directory to save the checkpoint in\n\
+  -v, --verbose             Enable verbose criu logs\n\
+  Checkpoint options:\n\
+  -s, --stop                Stop the container after checkpointing.\n\
+  Restore options:\n\
+  -d, --daemon              Daemonize the container (default)\n\
+  -F, --foreground          Start with the current tty attached to /dev/console\n\
+",
+       .options   = my_longopts,
+       .parser    = my_parser,
+       .daemonize = 1,
+       .checker   = my_checker,
+};
+
+bool checkpoint(struct lxc_container *c)
+{
+       bool ret;
+
+       if (!c->is_running(c)) {
+               fprintf(stderr, "%s not running, not checkpointing.\n", my_args.name);
+               lxc_container_put(c);
+               return false;
+       }
+
+       ret = c->checkpoint(c, checkpoint_dir, stop, verbose);
+       lxc_container_put(c);
+
+       if (!ret) {
+               fprintf(stderr, "Checkpointing %s failed.\n", my_args.name);
+               return false;
+       }
+
+       return true;
+}
+
+bool restore(struct lxc_container *c)
+{
+       pid_t pid = 0;
+       bool ret = true;
+
+       if (c->is_running(c)) {
+               fprintf(stderr, "%s is running, not restoring.\n", my_args.name);
+               lxc_container_put(c);
+               return false;
+       }
+
+       if (my_args.daemonize)
+               pid = fork();
+
+       if (pid == 0) {
+               ret = c->restore(c, checkpoint_dir, verbose);
+
+               if (!ret) {
+                       fprintf(stderr, "Restoring %s failed.\n", my_args.name);
+               }
+       }
+
+       lxc_container_put(c);
+
+       return ret;
+}
+
+int main(int argc, char *argv[])
+{
+       struct lxc_container *c;
+       bool ret;
+
+       if (lxc_arguments_parse(&my_args, argc, argv))
+               exit(1);
+
+       c = lxc_container_new(my_args.name, my_args.lxcpath[0]);
+       if (!c) {
+               fprintf(stderr, "System error loading %s\n", my_args.name);
+               exit(1);
+       }
+
+       if (!c->may_control(c)) {
+               fprintf(stderr, "Insufficent privileges to control %s\n", my_args.name);
+               lxc_container_put(c);
+               exit(1);
+       }
+
+       if (!c->is_defined(c)) {
+               fprintf(stderr, "%s is not defined\n", my_args.name);
+               lxc_container_put(c);
+               exit(1);
+       }
+
+
+       if (do_restore)
+               ret = restore(c);
+       else
+               ret = checkpoint(c);
+
+       return !ret;
+}
index 172e667e2f5963f5403680bbbdb159e185ae2376..ed6f8de977e84c075b770f7438bf793c4961cabe 100644 (file)
@@ -55,6 +55,7 @@
 #include "monitor.h"
 #include "namespace.h"
 #include "lxclock.h"
+#include "sync.h"
 
 #if HAVE_IFADDRS_H
 #include <ifaddrs.h>
@@ -3495,6 +3496,469 @@ static bool lxcapi_remove_device_node(struct lxc_container *c, const char *src_p
        return add_remove_device_node(c, src_path, dest_path, false);
 }
 
+struct criu_opts {
+       /* The type of criu invocation, one of "dump" or "restore" */
+       char *action;
+
+       /* The directory to pass to criu */
+       char *directory;
+
+       /* The container to dump */
+       struct lxc_container *c;
+
+       /* Enable criu verbose mode? */
+       bool verbose;
+
+       /* dump: stop the container or not after dumping? */
+       bool stop;
+
+       /* restore: the file to write the init process' pid into */
+       char *pidfile;
+};
+
+/*
+ * @out must be 128 bytes long
+ */
+static int read_criu_file(const char *directory, const char *file, int netnr, char *out)
+{
+       char path[PATH_MAX];
+       int ret;
+       FILE *f;
+
+       ret = snprintf(path, PATH_MAX,  "%s/%s%d", directory, file, netnr);
+       if (ret < 0 || ret >= PATH_MAX) {
+               ERROR("%s: path too long", __func__);
+               return -1;
+       }
+
+       f = fopen(path, "r");
+       if (!f)
+               return -1;
+
+       ret = fscanf(f, "%127s", out);
+       fclose(f);
+       if (ret <= 0)
+               return -1;
+
+       return 0;
+}
+
+static void exec_criu(struct criu_opts *opts)
+{
+       char **argv, log[PATH_MAX];
+       int static_args = 13, argc = 0, i, ret;
+
+       /* The command line always looks like:
+        * criu $(action) --tcp-established --file-locks --link-remap --manage-cgroups \
+        *     --action-script foo.sh -D $(directory) -o $(directory)/$(action).log
+        * +1 for final NULL */
+
+       if (strcmp(opts->action, "dump") == 0) {
+               /* -t pid */
+               static_args += 2;
+
+               /* --leave-running */
+               if (!opts->stop)
+                       static_args++;
+       } else if (strcmp(opts->action, "restore") == 0) {
+               /* --root $(lxc_mount_point) --restore-detached --pidfile $foo */
+               static_args += 5;
+       } else {
+               return;
+       }
+
+       if (opts->verbose)
+               static_args++;
+
+       ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
+       if (ret < 0 || ret >= PATH_MAX) {
+               ERROR("logfile name too long\n");
+               return;
+       }
+
+       argv = malloc(static_args * sizeof(*argv));
+       if (!argv)
+               return;
+
+       memset(argv, 0, static_args * sizeof(*argv));
+
+#define DECLARE_ARG(arg)                       \
+       do {                                    \
+               argv[argc++] = strdup(arg);     \
+               if (!argv[argc-1])              \
+                       goto err;               \
+       } while (0)
+
+       argv[argc++] = on_path("criu", NULL);
+       if (!argv[argc-1]) {
+               ERROR("Couldn't find criu binary\n");
+               goto err;
+       }
+
+       DECLARE_ARG(opts->action);
+       DECLARE_ARG("--tcp-established");
+       DECLARE_ARG("--file-locks");
+       DECLARE_ARG("--link-remap");
+       DECLARE_ARG("--manage-cgroups");
+       DECLARE_ARG("--action-script");
+       DECLARE_ARG(LIBEXECDIR "/lxc/lxc-restore-net");
+       DECLARE_ARG("-D");
+       DECLARE_ARG(opts->directory);
+       DECLARE_ARG("-o");
+       DECLARE_ARG(log);
+
+       if (opts->verbose)
+               DECLARE_ARG("-vvvvvv");
+
+       if (strcmp(opts->action, "dump") == 0) {
+               char pid[32];
+
+               if (sprintf(pid, "%d", lxcapi_init_pid(opts->c)) < 0)
+                       goto err;
+
+               DECLARE_ARG("-t");
+               DECLARE_ARG(pid);
+               if (!opts->stop)
+                       DECLARE_ARG("--leave-running");
+       } else if (strcmp(opts->action, "restore") == 0) {
+               int netnr = 0;
+               struct lxc_list *it;
+
+               DECLARE_ARG("--root");
+               DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
+               DECLARE_ARG("--restore-detached");
+               DECLARE_ARG("--pidfile");
+               DECLARE_ARG(opts->pidfile);
+
+               lxc_list_for_each(it, &opts->c->lxc_conf->network) {
+                       char eth[128], veth[128], buf[257];
+                       void *m;
+
+                       if (read_criu_file(opts->directory, "veth", netnr, veth))
+                               goto err;
+                       if (read_criu_file(opts->directory, "eth", netnr, eth))
+                               goto err;
+                       ret = snprintf(buf, 257, "%s=%s", eth, veth);
+                       if (ret < 0 || ret >= 257)
+                               goto err;
+
+                       /* final NULL and --veth-pair eth0:vethASDF */
+                       m = realloc(argv, (argc + 1 + 2) * sizeof(*argv));
+                       if (!m)
+                               goto err;
+                       argv = m;
+
+                       DECLARE_ARG("--veth-pair");
+                       DECLARE_ARG(buf);
+                       argv[argc] = NULL;
+
+                       netnr++;
+               }
+       }
+
+#undef DECLARE_ARG
+
+       execv(argv[0], argv);
+err:
+       for (i = 0; argv[i]; i++)
+               free(argv[i]);
+       free(argv);
+}
+
+/* Check and make sure the container has a configuration that we know CRIU can
+ * dump. */
+static bool criu_ok(struct lxc_container *c)
+{
+       struct lxc_list *it;
+       bool found_deny_rule = false;
+
+       if (geteuid()) {
+               ERROR("Must be root to checkpoint\n");
+               return false;
+       }
+
+       /* We only know how to restore containers with veth networks. */
+       lxc_list_for_each(it, &c->lxc_conf->network) {
+               struct lxc_netdev *n = it->elem;
+               if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE) {
+                       ERROR("Found network that is not VETH or NONE\n");
+                       return false;
+               }
+       }
+
+       // These requirements come from http://criu.org/LXC
+       if (c->lxc_conf->console.path &&
+                       strcmp(c->lxc_conf->console.path, "none") != 0) {
+               ERROR("lxc.console must be none\n");
+               return false;
+       }
+
+       if (c->lxc_conf->tty != 0) {
+               ERROR("lxc.tty must be 0\n");
+               return false;
+       }
+
+       lxc_list_for_each(it, &c->lxc_conf->cgroup) {
+               struct lxc_cgroup *cg = it->elem;
+               if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+                               strcmp(cg->value, "c 5:1 rwm") == 0) {
+
+                       found_deny_rule = true;
+                       break;
+               }
+       }
+
+       if (!found_deny_rule) {
+               ERROR("couldn't find devices.deny = c 5:1 rwm");
+               return false;
+       }
+
+       return true;
+}
+
+static bool lxcapi_checkpoint(struct lxc_container *c, char *directory, bool stop, bool verbose)
+{
+       int netnr, status;
+       struct lxc_list *it;
+       bool error = false;
+       pid_t pid;
+
+       if (!criu_ok(c))
+               return false;
+
+       if (mkdir(directory, 0700) < 0 && errno != EEXIST)
+               return false;
+
+       netnr = 0;
+       lxc_list_for_each(it, &c->lxc_conf->network) {
+               char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
+               struct lxc_netdev *n = it->elem;
+               int pret;
+
+               pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
+               if (pret < 0 || pret >= PATH_MAX) {
+                       error = true;
+                       goto out;
+               }
+
+               veth = lxcapi_get_running_config_item(c, veth_path);
+               if (!veth) {
+                       /* criu_ok() checks that all interfaces are
+                        * LXC_NET{VETH,NONE}, and VETHs should have this
+                        * config */
+                       assert(n->type == LXC_NET_NONE);
+                       break;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.link", netnr);
+               if (pret < 0 || pret >= PATH_MAX) {
+                       error = true;
+                       goto out;
+               }
+
+               bridge = lxcapi_get_running_config_item(c, veth_path);
+               if (!bridge) {
+                       error = true;
+                       goto out;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0) {
+                       error = true;
+                       goto out;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/bridge%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, bridge) < 0) {
+                       error = true;
+                       goto out;
+               }
+
+               if (n->name) {
+                       if (strlen(n->name) >= 128) {
+                               error = true;
+                               goto out;
+                       }
+                       strncpy(eth, n->name, 128);
+               } else
+                       sprintf(eth, "eth%d", netnr);
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/eth%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, eth) < 0)
+                       error = true;
+
+out:
+               free(veth);
+               free(bridge);
+               if (error)
+                       return false;
+       }
+
+       pid = fork();
+       if (pid < 0)
+               return false;
+
+       if (pid == 0) {
+               struct criu_opts os;
+
+               os.action = "dump";
+               os.directory = directory;
+               os.c = c;
+               os.stop = stop;
+               os.verbose = verbose;
+
+               /* exec_criu() returning is an error */
+               exec_criu(&os);
+               exit(1);
+       } else {
+               pid_t w = waitpid(pid, &status, 0);
+               if (w == -1) {
+                       perror("waitpid");
+                       return false;
+               }
+
+               if (WIFEXITED(status)) {
+                       return !WEXITSTATUS(status);
+               }
+
+               return false;
+       }
+}
+
+static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbose)
+{
+       pid_t pid;
+       struct lxc_list *it;
+       struct lxc_rootfs *rootfs;
+       char pidfile[L_tmpnam];
+
+       if (!criu_ok(c))
+               return false;
+
+       if (geteuid()) {
+               ERROR("Must be root to restore\n");
+               return false;
+       }
+
+       if (!tmpnam(pidfile))
+               return false;
+
+       struct lxc_handler *handler;
+
+       handler = lxc_init(c->name, c->lxc_conf, c->config_path);
+       if (!handler)
+               return false;
+
+       pid = fork();
+       if (pid < 0)
+               return false;
+
+       if (pid == 0) {
+               struct criu_opts os;
+
+               if (unshare(CLONE_NEWNS))
+                       return false;
+
+               /* CRIU needs the lxc root bind mounted so that it is the root of some
+                * mount. */
+               rootfs = &c->lxc_conf->rootfs;
+
+               if (rootfs_is_blockdev(c->lxc_conf)) {
+                       if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
+                               return false;
+               }
+               else {
+                       if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
+                               return false;
+
+                       if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
+                               rmdir(rootfs->mount);
+                               return false;
+                       }
+               }
+
+               os.action = "restore";
+               os.directory = directory;
+               os.c = c;
+               os.pidfile = pidfile;
+               os.verbose = verbose;
+
+               /* exec_criu() returning is an error */
+               exec_criu(&os);
+               umount(rootfs->mount);
+               rmdir(rootfs->mount);
+               exit(1);
+       } else {
+               int status;
+               pid_t w = waitpid(pid, &status, 0);
+
+               if (w == -1) {
+                       perror("waitpid");
+                       return false;
+               }
+
+               if (WIFEXITED(status)) {
+                       if (WEXITSTATUS(status)) {
+                               return false;
+                       }
+                       else {
+                               int netnr = 0, ret;
+                               bool error = false;
+                               FILE *f = fopen(pidfile, "r");
+                               if (!f) {
+                                       perror("reading pidfile");
+                                       ERROR("couldn't read restore's init pidfile %s\n", pidfile);
+                                       return false;
+                               }
+
+                               ret = fscanf(f, "%d", (int*) &handler->pid);
+                               fclose(f);
+                               if (ret != 1) {
+                                       ERROR("reading restore pid failed");
+                                       return false;
+                               }
+
+                               if (container_mem_lock(c))
+                                       return false;
+
+                               lxc_list_for_each(it, &c->lxc_conf->network) {
+                                       char eth[128], veth[128];
+                                       struct lxc_netdev *netdev = it->elem;
+
+                                       if (read_criu_file(directory, "veth", netnr, veth)) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       if (read_criu_file(directory, "eth", netnr, eth)) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       netdev->priv.veth_attr.pair = strdup(veth);
+                                       if (!netdev->priv.veth_attr.pair) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       netnr++;
+                               }
+out_unlock:
+                               container_mem_unlock(c);
+                               if (error)
+                                       return false;
+
+                               if (lxc_set_state(c->name, handler, RUNNING))
+                                       return false;
+                       }
+               }
+
+               if (lxc_poll(c->name, handler)) {
+                       lxc_abort(c->name, handler);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
 static int lxcapi_attach_run_waitl(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...)
 {
        va_list ap;
@@ -3627,6 +4091,8 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        c->may_control = lxcapi_may_control;
        c->add_device_node = lxcapi_add_device_node;
        c->remove_device_node = lxcapi_remove_device_node;
+       c->checkpoint = lxcapi_checkpoint;
+       c->restore = lxcapi_restore;
 
        /* we'll allow the caller to update these later */
        if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0, c->config_path)) {
index 5085c43ef6e9f9cf5e5e27fa204dec4a3adbc52a..6344f3d7120be812833403757966b38928f7bbdb 100644 (file)
@@ -760,6 +760,31 @@ struct lxc_container {
         * \return \c true on success, else \c false.
         */
        bool (*remove_device_node)(struct lxc_container *c, const char *src_path, const char *dest_path);
+
+       /*!
+        * \brief Checkpoint a container.
+        *
+        * \param c Container.
+        * \param directory The directory to dump the container to.
+        * \param stop Whether or not to stop the container after checkpointing.
+        * \param verbose Enable criu's verbose logs.
+        *
+        * \return \c true on success, else \c false.
+        * present at compile time).
+        */
+       bool (*checkpoint)(struct lxc_container *c, char *directory, bool stop, bool verbose);
+
+       /*!
+        * \brief Restore a container from a checkpoint.
+        *
+        * \param c Container.
+        * \param directory The directory to restore the container from.
+        * \param verbose Enable criu's verbose logs.
+        *
+        * \return \c true on success, else \c false.
+        *
+        */
+       bool (*restore)(struct lxc_container *c, char *directory, bool verbose);
 };
 
 /*!
index f282b93cfb41a20b26baf2df52b667545c2b4120..98849e10e721e9a5e2842f60ad7902dcc38f56c7 100644 (file)
@@ -300,14 +300,14 @@ static int signal_handler(int fd, uint32_t events, void *data,
        return 1;
 }
 
-static int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
+int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
 {
        handler->state = state;
        lxc_monitor_send_state(name, state, handler->lxcpath);
        return 0;
 }
 
-static int lxc_poll(const char *name, struct lxc_handler *handler)
+int lxc_poll(const char *name, struct lxc_handler *handler)
 {
        int sigfd = handler->sigfd;
        int pid = handler->pid;
@@ -485,7 +485,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
        free(handler);
 }
 
-static void lxc_abort(const char *name, struct lxc_handler *handler)
+void lxc_abort(const char *name, struct lxc_handler *handler)
 {
        int ret, status;
 
index ca7891cdd920f97010ad1683c449e4a9a02b2fef..8af0a0607fe75bf19a6e554a6efaf4570628fe46 100644 (file)
@@ -74,6 +74,10 @@ struct lxc_handler {
        void *cgroup_data;
 };
 
+
+extern int lxc_poll(const char *name, struct lxc_handler *handler);
+extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state);
+extern void lxc_abort(const char *name, struct lxc_handler *handler);
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
 
 extern int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore);
index a32829d1179e2f33d5c554005a3898c0a66fe550..ed34706bff96a8f8efe613685f2ac80c4ca0f7d6 100644 (file)
@@ -1446,3 +1446,17 @@ out1:
        free(retv);
        return NULL;
 }
+
+int print_to_file(const char *file, const char *content)
+{
+       FILE *f;
+       int ret = 0;
+
+       f = fopen(file, "w");
+       if (!f)
+               return -1;
+       if (fprintf(f, "%s", content) != strlen(content))
+               ret = -1;
+       fclose(f);
+       return ret;
+}
index a84b4897d447bcdc90c33cb1ff1d6347292529ed..cdfe56aefd868c366bff75291ac4afdb3741616b 100644 (file)
@@ -282,3 +282,4 @@ int detect_ramfs_rootfs(void);
 char *on_path(char *cmd, const char *rootfs);
 bool file_exists(const char *f);
 char *choose_init(const char *rootfs);
+int print_to_file(const char *file, const char *content);
index 6b4fceb74709494db445f18e6cb5147c9c518d0c..85f6cea999b71c8c1daed9769b7aebc86f4ca6e3 100644 (file)
@@ -51,7 +51,7 @@ bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
 bin_SCRIPTS = lxc-test-autostart
 
 if DISTRO_UBUNTU
-bin_SCRIPTS += lxc-test-usernic lxc-test-ubuntu lxc-test-unpriv
+bin_SCRIPTS += lxc-test-usernic lxc-test-ubuntu lxc-test-unpriv lxc-test-checkpoint
 endif
 
 endif
diff --git a/src/tests/lxc-test-checkpoint-restore b/src/tests/lxc-test-checkpoint-restore
new file mode 100755 (executable)
index 0000000..43068ef
--- /dev/null
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+# Do an end to end checkpoint and restore with criu.
+
+set -e
+
+FAIL() {
+       echo -n "Failed " >&2
+       echo "$*" >&2
+       exit 1
+}
+
+if [ "$(id -u)" != "0" ]; then
+       echo "ERROR: Must run as root."
+       exit 1
+fi
+
+if [ "$(criu --version | head -n1 | cut -d' ' -f 2)" != "1.3-rc2" ]; then
+       echo "SKIP: skipping test because no (or wrong) criu installed."
+       exit 0
+fi
+
+name=lxc-test-criu
+lxc-create -t ubuntu -n $name || FAIL "creating container"
+
+cat >> "$(lxc-config lxc.lxcpath)/$name/config" <<EOF
+# hax for criu
+lxc.console = none
+lxc.tty = 0
+lxc.cgroup.devices.deny = c 5:1 rwm
+EOF
+
+lxc-start -n $name -d || FAIL "starting container"
+lxc-wait -n $name -s RUNNING || FAIL "waiting for container to run"
+
+# Let the container boot and get into a steady state.
+sleep 5s
+
+# The first time this usually fails because CRIU cannot checkpoint things with
+# data on a socket.
+lxc-checkpoint -n $name -v -s -D /tmp/checkpoint || FAIL "failed checkpointing"
+lxc-wait -n u1 -s STOPPED
+lxc-checkpoint -n $name -v -r -D /tmp/checkpoint || FAIL "failed restoring"
+
+lxc-stop -n $name -t 1
+lxc-destroy -f -n $name