]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add RootImage= setting for using a specific image file as root directory for...
authorLennart Poettering <lennart@poettering.net>
Fri, 23 Dec 2016 13:26:05 +0000 (14:26 +0100)
committerLennart Poettering <lennart@poettering.net>
Tue, 7 Feb 2017 11:19:42 +0000 (12:19 +0100)
This is similar to RootDirectory= but mounts the root file system from a
block device or loopback file instead of another directory.

This reuses the image dissector code now used by nspawn and
gpt-auto-discovery.

man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/namespace.c
src/core/namespace.h
src/core/unit.c
src/shared/bus-unit-util.c
src/test/test-ns.c

index e594dc1b0cdcb9f99bb44dd1361d6573f790b79c..09e78c678676ea529baa692e2d72f9f09e7b20a1 100644 (file)
     <para>A few execution parameters result in additional, automatic
     dependencies to be added.</para>
 
-    <para>Units with <varname>WorkingDirectory=</varname> or
-    <varname>RootDirectory=</varname> set automatically gain
-    dependencies of type <varname>Requires=</varname> and
-    <varname>After=</varname> on all mount units required to access
-    the specified paths. This is equivalent to having them listed
-    explicitly in <varname>RequiresMountsFor=</varname>.</para>
+    <para>Units with <varname>WorkingDirectory=</varname>, <varname>RootDirectory=</varname> or
+    <varname>RootImage=</varname> set automatically gain dependencies of type <varname>Requires=</varname> and
+    <varname>After=</varname> on all mount units required to access the specified paths. This is equivalent to having
+    them listed explicitly in <varname>RequiresMountsFor=</varname>.</para>
 
     <para>Similar, units with <varname>PrivateTmp=</varname> enabled automatically get mount unit dependencies for all
     mounts required to access <filename>/tmp</filename> and <filename>/var/tmp</filename>. They will also gain an
         <varname>User=</varname> is used. If not set, defaults to the root directory when systemd is running as a
         system instance and the respective user's home directory if run as user. If the setting is prefixed with the
         <literal>-</literal> character, a missing working directory is not considered fatal. If
-        <varname>RootDirectory=</varname> is not set, then <varname>WorkingDirectory=</varname> is relative to the root
-        of the system running the service manager.  Note that setting this parameter might result in additional
-        dependencies to be added to the unit (see above).</para></listitem>
+        <varname>RootDirectory=</varname>/<varname>RootImage=</varname> is not set, then
+        <varname>WorkingDirectory=</varname> is relative to the root of the system running the service manager.  Note
+        that setting this parameter might result in additional dependencies to be added to the unit (see
+        above).</para></listitem>
       </varlistentry>
 
       <varlistentry>
         in conjunction with <varname>RootDirectory=</varname>. For details, see below.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>RootImage=</varname></term>
+        <listitem><para>Takes a path to a block device node or regular file as argument. This call is similar to
+        <varname>RootDirectory=</varname> however mounts a file system hierarchy from a block device node or loopack
+        file instead of a directory. The device node or file system image file needs to contain a file system without a
+        partition table, or a file system within an MBR/MS-DOS or GPT partition table with only a single
+        Linux-compatible partition, or a set of file systems within a GPT partition table that follows the <ulink
+        url="http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/">Discoverable Partitions
+        Specification</ulink>.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>MountAPIVFS=</varname></term>
 
         <listitem><para>Takes a boolean argument. If on, a private mount namespace for the unit's processes is created
         and the API file systems <filename>/proc</filename>, <filename>/sys</filename> and <filename>/dev</filename>
         will be mounted inside of it, unless they are already mounted. Note that this option has no effect unless used
-        in conjunction with <varname>RootDirectory=</varname> as these three mounts are generally mounted in the host
+        in conjunction with <varname>RootDirectory=</varname>/<varname>RootImage=</varname> as these three mounts are generally mounted in the host
         anyway, and unless the root directory is changed the private mount namespace will be a 1:1 copy of the host's,
         and include these three mounts. Note that the <filename>/dev</filename> file system of the host is bind mounted
         if this option is used without <varname>PrivateDevices=</varname>. To run the service with a private, minimal
         access a process might have to the file system hierarchy. Each setting takes a space-separated list of paths
         relative to the host's root directory (i.e. the system running the service manager).  Note that if paths
         contain symlinks, they are resolved relative to the root directory set with
-        <varname>RootDirectory=</varname>.</para>
+        <varname>RootDirectory=</varname>/<varname>RootImage=</varname>.</para>
 
         <para>Paths listed in <varname>ReadWritePaths=</varname> are accessible from within the namespace with the same
         access modes as from outside of it. Paths listed in <varname>ReadOnlyPaths=</varname> are accessible for
         <para>Paths in <varname>ReadWritePaths=</varname>, <varname>ReadOnlyPaths=</varname> and
         <varname>InaccessiblePaths=</varname> may be prefixed with <literal>-</literal>, in which case they will be
         ignored when they do not exist. If prefixed with <literal>+</literal> the paths are taken relative to the root
-        directory of the unit, as configured with <varname>RootDirectory=</varname>, instead of relative to the root
-        directory of the host (see above). When combining <literal>-</literal> and <literal>+</literal> on the same
-        path make sure to specify <literal>-</literal> first, and <literal>+</literal> second.</para>
+        directory of the unit, as configured with <varname>RootDirectory=</varname>/<varname>RootImage=</varname>,
+        instead of relative to the root directory of the host (see above). When combining <literal>-</literal> and
+        <literal>+</literal> on the same path make sure to specify <literal>-</literal> first, and <literal>+</literal>
+        second.</para>
 
         <para>Note that using this setting will disconnect propagation of mounts from the service to the host
         (propagation in the opposite direction continues to work). This means that this setting may not be used for
         that in this case both read-only and regular bind mounts are reset, regardless which of the two settings is
         used.</para>
 
-        <para>This option is particularly useful when <varname>RootDirectory=</varname> is used. In this case the
-        source path refers to a path on the host file system, while the destination path refers to a path below the
-        root directory of the unit.</para></listitem>
+        <para>This option is particularly useful when <varname>RootDirectory=</varname>/<varname>RootImage=</varname>
+        is used. In this case the source path refers to a path on the host file system, while the destination path
+        refers to a path below the root directory of the unit.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         such as <varname>CapabilityBoundingSet=</varname> will affect only the latter, and there's no way to acquire
         additional capabilities in the host's user namespace. Defaults to off.</para>
 
-        <para>This setting is particularly useful in conjunction with <varname>RootDirectory=</varname>, as the need to
-        synchronize the user and group databases in the root directory and on the host is reduced, as the only users
-        and groups who need to be matched are <literal>root</literal>, <literal>nobody</literal> and the unit's own
-        user and group.</para></listitem>
+        <para>This setting is particularly useful in conjunction with
+        <varname>RootDirectory=</varname>/<varname>RootImage=</varname>, as the need to synchronize the user and group
+        databases in the root directory and on the host is reduced, as the only users and groups who need to be matched
+        are <literal>root</literal>, <literal>nobody</literal> and the unit's own user and group.</para></listitem>
       </varlistentry>
 
       <varlistentry>
index c57af5aaafa875605473ab25442b9a4c9c538413..7df4cab3f6f250e0d93f1c7ca5d9ce20d6331d06 100644 (file)
@@ -758,6 +758,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("LimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("WorkingDirectory", "s", property_get_working_directory, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(ExecContext, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RootImage", "s", NULL, offsetof(ExecContext, root_image), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("IOScheduling", "i", property_get_ioprio, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1048,7 +1049,7 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
-        } else if (STR_IN_SET(name, "TTYPath", "RootDirectory")) {
+        } else if (STR_IN_SET(name, "TTYPath", "RootDirectory", "RootImage")) {
                 const char *s;
 
                 r = sd_bus_message_read(message, "s", &s);
@@ -1061,6 +1062,8 @@ int bus_exec_context_set_transient_property(
                 if (mode != UNIT_CHECK) {
                         if (streq(name, "TTYPath"))
                                 r = free_and_strdup(&c->tty_path, s);
+                        else if (streq(name, "RootImage"))
+                                r = free_and_strdup(&c->root_image, s);
                         else {
                                 assert(streq(name, "RootDirectory"));
                                 r = free_and_strdup(&c->root_directory, s);
index 54f6418c5a8e45a11656a35c1a951958541ca90c..f57eb26388718280aa87155b8aa0c3d34b933932 100644 (file)
@@ -1640,6 +1640,9 @@ static bool exec_needs_mount_namespace(
         assert(context);
         assert(params);
 
+        if (context->root_image)
+                return true;
+
         if (!strv_isempty(context->read_write_paths) ||
             !strv_isempty(context->read_only_paths) ||
             !strv_isempty(context->inaccessible_paths))
@@ -1938,7 +1941,7 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
         int r;
         _cleanup_strv_free_ char **rw = NULL;
         char *tmp = NULL, *var = NULL;
-        const char *root_dir = NULL;
+        const char *root_dir = NULL, *root_image = NULL;
         NameSpaceInfo ns_info = {
                 .ignore_protect_paths = false,
                 .private_dev = context->private_devices,
@@ -1965,8 +1968,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
         if (r < 0)
                 return r;
 
-        if (params->flags & EXEC_APPLY_CHROOT)
-                root_dir = context->root_directory;
+        if (params->flags & EXEC_APPLY_CHROOT) {
+                root_image = context->root_image;
+
+                if (!root_image)
+                        root_dir = context->root_directory;
+        }
 
         /*
          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
@@ -1976,7 +1983,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
         if (!context->dynamic_user && root_dir)
                 ns_info.ignore_protect_paths = true;
 
-        r = setup_namespace(root_dir, &ns_info, rw,
+        r = setup_namespace(root_dir, root_image,
+                            &ns_info, rw,
                             context->read_only_paths,
                             context->inaccessible_paths,
                             context->bind_mounts,
@@ -1985,7 +1993,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
                             var,
                             context->protect_home,
                             context->protect_system,
-                            context->mount_flags);
+                            context->mount_flags,
+                            DISSECT_IMAGE_DISCARD_ON_LOOP);
 
         /* If we couldn't set up the namespace this is probably due to a
          * missing capability. In this case, silently proceeed. */
@@ -1999,10 +2008,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
         return r;
 }
 
-static int apply_working_directory(const ExecContext *context,
-                                   const ExecParameters *params,
-                                   const char *home,
-                                   const bool needs_mount_ns) {
+static int apply_working_directory(
+                const ExecContext *context,
+                const ExecParameters *params,
+                const char *home,
+                const bool needs_mount_ns) {
+
         const char *d;
         const char *wd;
 
@@ -2983,6 +2994,7 @@ void exec_context_done(ExecContext *c) {
 
         c->working_directory = mfree(c->working_directory);
         c->root_directory = mfree(c->root_directory);
+        c->root_image = mfree(c->root_image);
         c->tty_path = mfree(c->tty_path);
         c->syslog_identifier = mfree(c->syslog_identifier);
         c->user = mfree(c->user);
@@ -3320,6 +3332,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->memory_deny_write_execute),
                 prefix, yes_no(c->restrict_realtime));
 
+        if (c->root_image)
+                fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
+
         STRV_FOREACH(e, c->environment)
                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
 
index 6fd5a6e5ce783c8af77da40e63e91a5764b964f7..9f2b6fd39e334111c3996ba43cc4449c963c1212 100644 (file)
@@ -106,7 +106,7 @@ struct ExecContext {
         char **pass_environment;
 
         struct rlimit *rlimit[_RLIMIT_MAX];
-        char *working_directory, *root_directory;
+        char *working_directory, *root_directory, *root_image;
         bool working_directory_missing_ok;
         bool working_directory_home;
 
index 07f2a70c8f81739c0129af165ccf60f83f064f30..cb9e6fea2788244b99912309afce568571720ba5 100644 (file)
@@ -19,6 +19,7 @@ m4_dnl Define the context options only once
 m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
 `$1.WorkingDirectory,            config_parse_working_directory,     0,                             offsetof($1, exec_context)
 $1.RootDirectory,                config_parse_unit_path_printf,      0,                             offsetof($1, exec_context.root_directory)
+$1.RootImage,                    config_parse_unit_path_printf,      0,                             offsetof($1, exec_context.root_image)
 $1.User,                         config_parse_user_group,            0,                             offsetof($1, exec_context.user)
 $1.Group,                        config_parse_user_group,            0,                             offsetof($1, exec_context.group)
 $1.SupplementaryGroups,          config_parse_user_group_strv,       0,                             offsetof($1, exec_context.supplementary_groups)
index 10917f7b70ebae7a267a1c28029577ed1e7c1609..0ae5f704c77eed7019d3d91b292ae457e027b290 100644 (file)
@@ -30,6 +30,7 @@
 #include "dev-setup.h"
 #include "fd-util.h"
 #include "fs-util.h"
+#include "loop-util.h"
 #include "loopback-setup.h"
 #include "missing.h"
 #include "mkdir.h"
@@ -867,6 +868,7 @@ static unsigned namespace_calculate_mounts(
 
 int setup_namespace(
                 const char* root_directory,
+                const char* root_image,
                 const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
@@ -877,16 +879,46 @@ int setup_namespace(
                 const char* var_tmp_dir,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
-                unsigned long mount_flags) {
+                unsigned long mount_flags,
+                DissectImageFlags dissect_image_flags) {
 
+        _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+        _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
         MountEntry *m, *mounts = NULL;
         bool make_slave = false;
         unsigned n_mounts;
         int r = 0;
 
+        assert(ns_info);
+
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
+        if (root_image) {
+                dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT;
+
+                if (protect_system == PROTECT_SYSTEM_STRICT && strv_isempty(read_write_paths))
+                        dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
+
+                r = loop_device_make_by_path(root_image,
+                                             dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR,
+                                             &loop_device);
+                if (r < 0)
+                        return r;
+
+                r = dissect_image(loop_device->fd, NULL, 0, dissect_image_flags, &dissected_image);
+                if (r < 0)
+                        return r;
+
+                if (!root_directory) {
+                        /* Create a mount point for the image, if it's still missing. We use the same mount point for
+                         * all images, which is safe, since they all live in their own namespaces after all, and hence
+                         * won't see each other. */
+                        root_directory = "/run/systemd/unit-root";
+                        (void) mkdir(root_directory, 0700);
+                }
+        }
+
         n_mounts = namespace_calculate_mounts(
                         ns_info,
                         read_write_paths,
@@ -1001,7 +1033,15 @@ int setup_namespace(
                 }
         }
 
-        if (root_directory) {
+        if (root_image) {
+                r = dissected_image_mount(dissected_image, root_directory, dissect_image_flags);
+                if (r < 0)
+                        goto finish;
+
+                loop_device_relinquish(loop_device);
+
+        } else if (root_directory) {
+
                 /* Turn directory into bind mount, if it isn't one yet */
                 r = path_is_mount_point(root_directory, NULL, AT_SYMLINK_FOLLOW);
                 if (r < 0)
index bb9de9857c2cb7f441123a5a955178dd8ce152fe..f54954bd8645ca40e4953b29e08718be6e38a409 100644 (file)
@@ -25,6 +25,7 @@ typedef struct BindMount BindMount;
 
 #include <stdbool.h>
 
+#include "dissect-image.h"
 #include "macro.h"
 
 typedef enum ProtectHome {
@@ -63,6 +64,7 @@ struct BindMount {
 
 int setup_namespace(
                 const char *root_directory,
+                const char *root_image,
                 const NameSpaceInfo *ns_info,
                 char **read_write_paths,
                 char **read_only_paths,
@@ -73,7 +75,8 @@ int setup_namespace(
                 const char *var_tmp_dir,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
-                unsigned long mount_flags);
+                unsigned long mount_flags,
+                DissectImageFlags dissected_image_flags);
 
 int setup_tmp_dirs(
                 const char *id,
index 44f1d5e206ed690782f30178bed5aed2aad9d897..90d7eea95629d27e59cc80e339c89627da9f391a 100644 (file)
@@ -862,6 +862,12 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
                         return r;
         }
 
+        if (c->root_image) {
+                r = unit_require_mounts_for(u, c->root_image);
+                if (r < 0)
+                        return r;
+        }
+
         if (!MANAGER_IS_SYSTEM(u->manager))
                 return 0;
 
index a4677bef271fcc060b7b62fd79f72b5f338c0207..20c1085697ae6c6b01607700fdd8906a9156b8ad 100644 (file)
@@ -266,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                               "StandardInput", "StandardOutput", "StandardError",
                               "Description", "Slice", "Type", "WorkingDirectory",
                               "RootDirectory", "SyslogIdentifier", "ProtectSystem",
-                              "ProtectHome", "SELinuxContext", "Restart"))
+                              "ProtectHome", "SELinuxContext", "Restart", "RootImage"))
                 r = sd_bus_message_append(m, "v", "s", eq);
 
         else if (streq(field, "SyslogLevel")) {
index c99bcb371b99b545be5303ee1f65cb24bfe2efee..0125d905a633d18f259452ebd9735e06d5849aff 100644 (file)
@@ -77,6 +77,7 @@ int main(int argc, char *argv[]) {
                 log_info("Not chrooted");
 
         r = setup_namespace(root_directory,
+                            NULL,
                             &ns_info,
                             (char **) writable,
                             (char **) readonly,
@@ -86,6 +87,7 @@ int main(int argc, char *argv[]) {
                             var_tmp_dir,
                             PROTECT_HOME_NO,
                             PROTECT_SYSTEM_NO,
+                            0,
                             0);
         if (r < 0) {
                 log_error_errno(r, "Failed to setup namespace: %m");