]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nspawn: Add support for sysroot pivoting (#5258)
authorPhilip Withnall <philip@tecnocode.co.uk>
Wed, 8 Feb 2017 15:54:31 +0000 (15:54 +0000)
committerLennart Poettering <lennart@poettering.net>
Wed, 8 Feb 2017 15:54:31 +0000 (16:54 +0100)
Add a new --pivot-root argument to systemd-nspawn, which specifies a
directory to pivot to / inside the container; while the original / is
pivoted to another specified directory (if provided). This adds
support for booting container images which may contain several bootable
sysroots, as is common with OSTree disk images. When these disk images
are booted on real hardware, ostree-prepare-root is run in conjunction
with sysroot.mount in the initramfs to achieve the same results.

man/systemd-nspawn.xml
man/systemd.nspawn.xml
src/nspawn/nspawn-gperf.gperf
src/nspawn/nspawn-mount.c
src/nspawn/nspawn-mount.h
src/nspawn/nspawn-settings.c
src/nspawn/nspawn-settings.h
src/nspawn/nspawn.c

index f6b3f57fc77a24b31eafdba291aa2d4561662267..5e671d21e89c31377805a8d5ce9cf2d2c7105a4d 100644 (file)
         an absolute path in the container's file system namespace.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--pivot-root=</option></term>
+
+        <listitem><para>Pivot the specified directory to <filename>/</filename> inside the container, and either unmount the
+        container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the
+        specified path will be pivoted to <filename>/</filename> and the old root will be unmounted; or a colon-separated pair
+        of new root path and pivot destination for the old root. The new root path will be pivoted to <filename>/</filename>,
+        and the old <filename>/</filename> will be pivoted to the other directory. Both paths must be absolute, and are resolved
+        in the container's file system namespace.</para>
+
+        <para>This is for containers which have several bootable directories in them; for example, several
+        <ulink url="https://ostree.readthedocs.io/en/latest/">OSTree</ulink> deployments. It emulates the behavior of the boot
+        loader and initial RAM disk which normally select which directory to mount as root and start the container's PID 1 in.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>-u</option></term>
         <term><option>--user=</option></term>
       <programlisting># chcon system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -R /srv/container
 # systemd-nspawn -L system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -Z system_u:system_r:svirt_lxc_net_t:s0:c0,c1 -D /srv/container /bin/sh</programlisting>
     </example>
+
+    <example>
+      <title>Run a container with an OSTree deployment</title>
+
+      <programlisting># systemd-nspawn -b -i ~/image.raw --pivot-root=/ostree/deploy/$OS/deploy/$CHECKSUM:/sysroot --bind=+/sysroot/ostree/deploy/$OS/var:/var</programlisting>
+    </example>
   </refsect1>
 
   <refsect1>
index 714318835646b549e1968bd552038bfb7f21c384..4f3f0529119fafc766696ab9afdb7307fe2765cc 100644 (file)
         switch.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>PivotRoot=</varname></term>
+
+        <listitem><para>Selects a directory to pivot to <filename>/</filename> inside the container when starting up.
+        Takes a single path, or a pair of two paths separated by a colon. Both paths must be absolute, and are resolved
+        in the container's file system namespace. This corresponds to the <option>--pivot-root=</option> command line
+        switch.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>Capability=</varname></term>
         <term><varname>DropCapability=</varname></term>
index c0fa4bfa1f39d7686106ce59e559b0902678f383..e5fdf63162f25951f39ac2d123d80e1429840f24 100644 (file)
@@ -26,6 +26,7 @@ Exec.KillSignal,              config_parse_signal,        0, offsetof(Settings,
 Exec.Personality,             config_parse_personality,   0, offsetof(Settings, personality)
 Exec.MachineID,               config_parse_id128,         0, offsetof(Settings, machine_id)
 Exec.WorkingDirectory,        config_parse_path,          0, offsetof(Settings, working_directory)
+Exec.PivotRoot,               config_parse_pivot_root,    0, 0
 Exec.PrivateUsers,            config_parse_private_users, 0, 0
 Exec.NotifyReady,             config_parse_bool,          0, offsetof(Settings, notify_ready)
 Files.ReadOnly,               config_parse_tristate,      0, offsetof(Settings, read_only)
index 72c007f204c657472dc153f3b247bb6f0a5cc025..4b2838b752c3f906c8bc9d1294379bdf331de1c1 100644 (file)
@@ -1349,3 +1349,116 @@ fail:
         (void) rmdir(template);
         return r;
 }
+
+/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
+        _cleanup_free_ char *root_new = NULL, *root_old = NULL;
+        const char *p = s;
+        int r;
+
+        assert(pivot_root_new);
+        assert(pivot_root_old);
+
+        r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        if (isempty(p))
+                root_old = NULL;
+        else {
+                root_old = strdup(p);
+                if (!root_old)
+                        return -ENOMEM;
+        }
+
+        if (!path_is_absolute(root_new))
+                return -EINVAL;
+        if (root_old && !path_is_absolute(root_old))
+                return -EINVAL;
+
+        free_and_replace(*pivot_root_new, root_new);
+        free_and_replace(*pivot_root_old, root_old);
+
+        return 0;
+}
+
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
+        _cleanup_free_ char *directory_pivot_root_new = NULL;
+        _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
+        char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
+        bool remove_pivot_tmp = false;
+        int r;
+
+        assert(directory);
+
+        if (!pivot_root_new)
+                return 0;
+
+        /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
+         * If pivot_root_old is NULL, the existing / disappears.
+         * This requires a temporary directory, pivot_tmp, which is
+         * not a child of either.
+         *
+         * This is typically used for OSTree-style containers, where
+         * the root partition contains several sysroots which could be
+         * run. Normally, one would be chosen by the bootloader and
+         * pivoted to / by initramfs.
+         *
+         * For example, for an OSTree deployment, pivot_root_new
+         * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
+         * code doesn’t do the /var mount which OSTree expects: use
+         * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
+         *
+         * So in the OSTree case, we’ll end up with something like:
+         *  - directory = /tmp/nspawn-root-123456
+         *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
+         *  - pivot_root_old = /sysroot
+         *  - directory_pivot_root_new =
+         *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
+         *  - pivot_tmp = /tmp/nspawn-pivot-123456
+         *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
+         *
+         * Requires all file systems at directory and below to be mounted
+         * MS_PRIVATE or MS_SLAVE so they can be moved.
+         */
+        directory_pivot_root_new = prefix_root(directory, pivot_root_new);
+
+        /* Remount directory_pivot_root_new to make it movable. */
+        r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
+        if (r < 0)
+                goto done;
+
+        if (pivot_root_old) {
+                if (!mkdtemp(pivot_tmp)) {
+                        r = log_error_errno(errno, "Failed to create temporary directory: %m");
+                        goto done;
+                }
+
+                remove_pivot_tmp = true;
+                pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
+
+                r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+
+                r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+
+                r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+        } else {
+                r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
+                if (r < 0)
+                        goto done;
+        }
+
+done:
+        if (remove_pivot_tmp)
+                (void) rmdir(pivot_tmp);
+
+        return r;
+}
index 6b33fbff5766f8d25af445e9379932bb6c1f8021..2777d2169b7bf61bcc3f2dd94e2cf7a23f9b781a 100644 (file)
@@ -70,3 +70,6 @@ int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns,
 
 int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
 int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
index 22b74d88e46a84f907122ced2fa3af332a5128d7..5217d10665d7058c321a2de09d1db8c7d41531af 100644 (file)
@@ -90,6 +90,8 @@ Settings* settings_free(Settings *s) {
         strv_free(s->parameters);
         strv_free(s->environment);
         free(s->user);
+        free(s->pivot_root_new);
+        free(s->pivot_root_old);
         free(s->working_directory);
 
         strv_free(s->network_interfaces);
@@ -237,6 +239,34 @@ int config_parse_id128(
         return 0;
 }
 
+int config_parse_pivot_root(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        Settings *settings = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+
+        r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue);
+                return 0;
+        }
+
+        return 0;
+}
+
 int config_parse_bind(
                 const char *unit,
                 const char *filename,
index e9ea0871914688564d6045df588e06af4a9c6b27..021403258ff674b20016e93d19b00959e0d45cba 100644 (file)
@@ -57,7 +57,8 @@ typedef enum SettingsMask {
         SETTING_WORKING_DIRECTORY = 1 << 12,
         SETTING_USERNS            = 1 << 13,
         SETTING_NOTIFY_READY      = 1 << 14,
-        _SETTINGS_MASK_ALL        = (1 << 15) -1
+        SETTING_PIVOT_ROOT        = 1 << 15,
+        _SETTINGS_MASK_ALL        = (1 << 16) -1
 } SettingsMask;
 
 typedef struct Settings {
@@ -72,6 +73,8 @@ typedef struct Settings {
         unsigned long personality;
         sd_id128_t machine_id;
         char *working_directory;
+        char *pivot_root_new;
+        char *pivot_root_old;
         UserNamespaceMode userns_mode;
         uid_t uid_shift, uid_range;
         bool notify_ready;
@@ -109,6 +112,7 @@ int config_parse_capability(const char *unit, const char *filename, unsigned lin
 int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_pivot_root(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_overlay(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
index 5594b87efa0de18d961a74f92b421a3dd20f8ea5..a8d33ad907f3a0785fdf3845fb5bd59a90946593 100644 (file)
@@ -132,6 +132,8 @@ typedef enum LinkJournal {
 static char *arg_directory = NULL;
 static char *arg_template = NULL;
 static char *arg_chdir = NULL;
+static char *arg_pivot_root_new = NULL;
+static char *arg_pivot_root_old = NULL;
 static char *arg_user = NULL;
 static sd_id128_t arg_uuid = {};
 static char *arg_machine = NULL;
@@ -221,6 +223,8 @@ static void help(void) {
                "  -a --as-pid2              Maintain a stub init as PID1, invoke binary as PID2\n"
                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
                "     --chdir=PATH           Set working directory in the container\n"
+               "     --pivot-root=PATH[:PATH]\n"
+               "                            Pivot root to given directory in the container\n"
                "  -u --user=USER            Run the command under specified user or uid\n"
                "  -M --machine=NAME         Set the machine name for the container\n"
                "     --uuid=UUID            Set a specific machine UUID for the container\n"
@@ -427,6 +431,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_KILL_SIGNAL,
                 ARG_SETTINGS,
                 ARG_CHDIR,
+                ARG_PIVOT_ROOT,
                 ARG_PRIVATE_USERS_CHOWN,
                 ARG_NOTIFY_READY,
                 ARG_ROOT_HASH,
@@ -478,6 +483,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "kill-signal",           required_argument, NULL, ARG_KILL_SIGNAL         },
                 { "settings",              required_argument, NULL, ARG_SETTINGS            },
                 { "chdir",                 required_argument, NULL, ARG_CHDIR               },
+                { "pivot-root",            required_argument, NULL, ARG_PIVOT_ROOT          },
                 { "notify-ready",          required_argument, NULL, ARG_NOTIFY_READY        },
                 { "root-hash",             required_argument, NULL, ARG_ROOT_HASH           },
                 {}
@@ -1012,6 +1018,14 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_settings_mask |= SETTING_WORKING_DIRECTORY;
                         break;
 
+                case ARG_PIVOT_ROOT:
+                        r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg);
+
+                        arg_settings_mask |= SETTING_PIVOT_ROOT;
+                        break;
+
                 case ARG_NOTIFY_READY:
                         r = parse_boolean(optarg);
                         if (r < 0) {
@@ -2493,6 +2507,13 @@ static int outer_child(
         if (r < 0)
                 return r;
 
+        r = setup_pivot_root(
+                        directory,
+                        arg_pivot_root_new,
+                        arg_pivot_root_old);
+        if (r < 0)
+                return r;
+
         r = setup_volatile(
                         directory,
                         arg_volatile_mode,
@@ -2915,6 +2936,12 @@ static int load_settings(void) {
                 settings->parameters = NULL;
         }
 
+        if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 &&
+            settings->pivot_root_new) {
+                free_and_replace(arg_pivot_root_new, settings->pivot_root_new);
+                free_and_replace(arg_pivot_root_old, settings->pivot_root_old);
+        }
+
         if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 &&
             settings->working_directory) {
                 free(arg_chdir);
@@ -3915,6 +3942,8 @@ finish:
         free(arg_image);
         free(arg_machine);
         free(arg_user);
+        free(arg_pivot_root_new);
+        free(arg_pivot_root_old);
         free(arg_chdir);
         strv_free(arg_setenv);
         free(arg_network_bridge);