]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add DBUS method to bind mount new nodes without service restart
authorLuca Boccassi <luca.boccassi@microsoft.com>
Thu, 30 Jul 2020 18:37:10 +0000 (19:37 +0100)
committerLuca Boccassi <luca.boccassi@microsoft.com>
Mon, 18 Jan 2021 17:24:05 +0000 (17:24 +0000)
Allow to setup new bind mounts for a service at runtime (via either
DBUS or a new 'systemctl bind' verb) with a new helper that forks into
the unit's mount namespace.
Add a new integration test to cover this.

Useful for zero-downtime addition to services that are running inside
mount namespaces, especially when using RootImage/RootDirectory.

If a service runs with a read-only root, a tmpfs is added on /run
to ensure we can create the airlock directory for incoming mounts
under /run/host/incoming.

29 files changed:
man/org.freedesktop.systemd1.xml
man/systemctl.xml
man/systemd.exec.xml
meson.build
shell-completion/bash/systemctl.in
shell-completion/zsh/_systemctl.in
src/core/dbus-manager.c
src/core/dbus-service.c
src/core/dbus-service.h
src/core/dbus-unit.c
src/core/dbus-util.c
src/core/dbus-util.h
src/core/execute.c
src/core/execute.h
src/core/namespace.c
src/core/namespace.h
src/core/org.freedesktop.systemd1.conf
src/systemctl/systemctl-mount.c [new file with mode: 0644]
src/systemctl/systemctl-mount.h [new file with mode: 0644]
src/systemctl/systemctl.c
src/systemctl/systemctl.h
src/test/test-namespace.c
src/test/test-ns.c
test/TEST-57-RUNTIME-BIND-PATHS/Makefile [new symlink]
test/TEST-57-RUNTIME-BIND-PATHS/test.sh [new file with mode: 0755]
test/units/testsuite-57-namespaced.service [new file with mode: 0644]
test/units/testsuite-57-non-namespaced.service [new file with mode: 0644]
test/units/testsuite-57.service [new file with mode: 0644]
test/units/testsuite-57.sh [new file with mode: 0755]

index 78fd0b3378d58c084c33836d82305cf8231e1c04..90d0d664147af798eab66aa454ec53e07c3cc1c2 100644 (file)
@@ -116,6 +116,11 @@ node /org/freedesktop/systemd1 {
       SetUnitProperties(in  s name,
                         in  b runtime,
                         in  a(sv) properties);
+      BindMountUnit(in  s name,
+                    in  s source,
+                    in  s destination,
+                    in  b read_only,
+                    in  b mkdir);
       RefUnit(in  s name);
       UnrefUnit(in  s name);
       StartTransientUnit(in  s name,
@@ -767,6 +772,8 @@ node /org/freedesktop/systemd1 {
 
     <variablelist class="dbus-method" generated="True" extra-ref="SetUnitProperties()"/>
 
+    <variablelist class="dbus-method" generated="True" extra-ref="BindMountUnit()"/>
+
     <variablelist class="dbus-method" generated="True" extra-ref="RefUnit()"/>
 
     <variablelist class="dbus-method" generated="True" extra-ref="UnrefUnit()"/>
@@ -1156,6 +1163,9 @@ node /org/freedesktop/systemd1 {
       the "Try" flavor is used in which case a service that isn't running is not affected by the restart. The
       "ReloadOrRestart" flavors attempt a reload if the unit supports it and use a restart otherwise.</para>
 
+      <para><function>BindMountUnit()</function> can be used to bind mount new files or directories into
+      a running service mount namespace.</para>
+
       <para><function>KillUnit()</function> may be used to kill (i.e. send a signal to) all processes of a
       unit. It takes the unit <varname>name</varname>, an enum <varname>who</varname> and a UNIX
       <varname>signal</varname> number to send. The <varname>who</varname> enum is one of
@@ -2193,6 +2203,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
   interface org.freedesktop.systemd1.Service {
     methods:
+      BindMount(in  s source,
+                in  s destination,
+                in  b read_only,
+                in  b mkdir);
       GetProcesses(out a(sus) processes);
       AttachProcesses(in  s subcgroup,
                       in  au pids);
@@ -3252,6 +3266,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Service"/>
 
+    <variablelist class="dbus-method" generated="True" extra-ref="BindMount()"/>
+
     <variablelist class="dbus-method" generated="True" extra-ref="GetProcesses()"/>
 
     <variablelist class="dbus-method" generated="True" extra-ref="AttachProcesses()"/>
@@ -3810,6 +3826,17 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--End of Autogenerated section-->
 
+    <refsect2>
+      <title>Methods</title>
+
+      <para><function>BindMount()</function> implements the same operation as the respective method on the
+      <interfacename>Manager</interfacename> object (see above). However, this method operates on the service
+      object and hence does not take a unit name parameter. Invoking the methods directly on the Manager
+      object has the advantage of not requiring a <function>GetUnit()</function> call to get the unit object
+      for a specific unit name. Calling the methods on the Manager object is hence a round trip
+      optimization.</para>
+    </refsect2>
+
     <refsect2>
       <title>Properties</title>
 
index bb702cb078b3b9473fffa7fb23f3224441282b00..a954e8727b6e5fedc7891a9d3e91fa748a33a029 100644 (file)
@@ -550,6 +550,23 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
           </listitem>
         </varlistentry>
 
+        <varlistentry>
+          <term><command>bind</command> <replaceable>UNIT</replaceable> <replaceable>PATH</replaceable> [<replaceable>PATH</replaceable>]</term>
+
+          <listitem><para>Bind mounts a file or directory from the host into the specified unit's view. The first path
+          argument is the source file or directory on the host, the second path argument is the destination file or
+          directory in the unit's view. When the latter is omitted, the destination path in the unit's view is the same as
+          the source path on the host. When combined with the <option>--read-only</option> switch, a ready-only bind
+          mount is created. When combined with the <option>--mkdir</option> switch, the destination path is first created
+          before the mount is applied. Note that this option is currently only supported for units that run within a mount
+          namespace (e.g.: with <option>RootImage=</option>, <option>PrivateMounts=</option>, etc.). This command supports bind
+          mounting directories, regular files, device nodes, <constant>AF_UNIX</constant> socket nodes, as well as FIFOs.
+          The bind mount is ephemeral, and it is undone as soon as the current unit process exists.
+          Note that the namespace mentioned here, where the bind mount will be added to, is the one where the main service
+          process runs, as other processes run in distinct namespaces (e.g.: <option>ExecReload=</option>,
+          <option>ExecStartPre=</option>, etc.) </para></listitem>
+        </varlistentry>
+
         <varlistentry>
           <term><command>service-log-level</command> <replaceable>SERVICE</replaceable> [<replaceable>LEVEL</replaceable>]</term>
 
@@ -2246,6 +2263,21 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--mkdir</option></term>
+
+        <listitem><para>When used with <command>bind</command>, creates the destination file or directory before
+        applying the bind mount. Note that even though the name of this option suggests that it is suitable only for
+        directories, this option also creates the destination file node to mount over if the object to mount is not
+        a directory, but a regular file, device node, socket or FIFO.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--read-only</option></term>
+
+        <listitem><para>When used with <command>bind</command>, creates a read-only bind mount.</para></listitem>
+      </varlistentry>
+
       <xi:include href="user-system-options.xml" xpointer="host" />
       <xi:include href="user-system-options.xml" xpointer="machine" />
 
index 568839e0d9e3f77187ff7be25f4da2f9a519483d..9adb6a298e8e36c2fd69e497b78b585ab19e6aaf 100644 (file)
         the service with a private, minimal version of <filename>/dev/</filename>, combine this option with
         <varname>PrivateDevices=</varname>.</para>
 
+        <para>In order to allow propagating mounts at runtime in a safe manner, <filename>/run/systemd/propagate</filename>
+        on the host will be used to set up new mounts, and <filename>/run/host/incoming/</filename> in the private namespace
+        will be used as an intermediate step to store them before being moved to the final mount point.</para>
+
         <xi:include href="system-only.xml" xpointer="singular"/></listitem>
       </varlistentry>
 
index e360ec95c9a9f378898a73d2188aaf4cc8b4afee..2caf069602c32a9b8184078246d57b52ff784389 100644 (file)
@@ -2198,6 +2198,8 @@ public_programs += executable(
         'src/systemctl/systemctl-log-setting.h',
         'src/systemctl/systemctl-logind.c',
         'src/systemctl/systemctl-logind.h',
+        'src/systemctl/systemctl-mount.c',
+        'src/systemctl/systemctl-mount.h',
         'src/systemctl/systemctl-preset-all.c',
         'src/systemctl/systemctl-preset-all.h',
         'src/systemctl/systemctl-reset-failed.c',
index 4bd80948d6449734a1cfc2ed8a7df5fd304392b5..7e99ef4bf39fd6ebfba3cb3eb651277a52287fe8 100644 (file)
@@ -214,7 +214,7 @@ _systemctl () {
                              list-timers list-units list-unit-files poweroff
                              reboot rescue show-environment suspend get-default
                              is-system-running preset-all'
-        [FILE]='link switch-root'
+        [FILE]='link switch-root bind'
         [TARGETS]='set-default'
         [MACHINES]='list-machines'
         [LOG_LEVEL]='log-level'
index 10af6d5121e91e4d725406ce574f0304f676f2c0..c4ea78b3c100a8b6e128c47342bd7b8c116ca86b 100644 (file)
@@ -31,6 +31,7 @@
         "reset-failed:Reset failed state for all, one, or more units"
         "list-dependencies:Show unit dependency tree"
         "clean:Remove configuration, state, cache, logs or runtime data of units"
+        "bind:Bind mount a path from the host into a unit's namespace"
     )
 
     local -a machine_commands=(
@@ -378,6 +379,10 @@ done
         _files
     }
 
+(( $+functions[_systemctl_bind] )) || _systemctl_bind() {
+        _files
+    }
+
 # no systemctl completion for:
 #    [STANDALONE]='daemon-reexec daemon-reload default
 #                  emergency exit halt kexec list-jobs list-units
index 3e1d609aa37b32a48aab2765a8314ae768c5180a..4b88f0d9f0786d2905555f8583cc3f8d7a64ddaf 100644 (file)
@@ -16,6 +16,7 @@
 #include "dbus-job.h"
 #include "dbus-manager.h"
 #include "dbus-scope.h"
+#include "dbus-service.h"
 #include "dbus-unit.h"
 #include "dbus.h"
 #include "env-util.h"
@@ -725,6 +726,11 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s
         return method_generic_unit_operation(message, userdata, error, bus_unit_method_set_properties, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED);
 }
 
+static int method_bind_mount_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        /* Only add mounts on fully loaded units */
+        return method_generic_unit_operation(message, userdata, error, bus_service_method_bind_mount, GENERIC_UNIT_VALIDATE_LOADED);
+}
+
 static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
         /* Only allow reffing of fully loaded units, and make sure reffing a unit loads it. */
         return method_generic_unit_operation(message, userdata, error, bus_unit_method_ref, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED);
@@ -2760,6 +2766,16 @@ const sd_bus_vtable bus_manager_vtable[] = {
                                  NULL,,
                                  method_set_unit_properties,
                                  SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD_WITH_NAMES("BindMountUnit",
+                                 "sssbb",
+                                 SD_BUS_PARAM(name)
+                                 SD_BUS_PARAM(source)
+                                 SD_BUS_PARAM(destination)
+                                 SD_BUS_PARAM(read_only)
+                                 SD_BUS_PARAM(mkdir),
+                                 NULL,,
+                                 method_bind_mount_unit,
+                                 SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD_WITH_NAMES("RefUnit",
                                  "s",
                                  SD_BUS_PARAM(name),
index 64f9d4ab362502fc5924fcb4ca38d4e8053f743d..6df93e44a43b7a8ee47d39e4778d5812e09dcb3f 100644 (file)
 #include "dbus-manager.h"
 #include "dbus-service.h"
 #include "dbus-util.h"
+#include "execute.h"
 #include "exit-status.h"
 #include "fd-util.h"
 #include "fileio.h"
+#include "locale-util.h"
+#include "mount-util.h"
 #include "parse-util.h"
 #include "path-util.h"
+#include "selinux-access.h"
 #include "service.h"
 #include "signal-util.h"
 #include "string-util.h"
@@ -91,6 +95,79 @@ static int property_get_exit_status_set(
         return sd_bus_message_close_container(reply);
 }
 
+int bus_service_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        int read_only, make_file_or_directory;
+        const char *dest, *src, *propagate_directory;
+        Unit *u = userdata;
+        ExecContext *c;
+        pid_t unit_pid;
+        int r;
+
+        assert(message);
+        assert(u);
+
+        if (!MANAGER_IS_SYSTEM(u->manager))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Adding bind mounts at runtime is only supported for system managers.");
+
+        r = mac_selinux_unit_access_check(u, message, "start", error);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_read(message, "ssbb", &src, &dest, &read_only, &make_file_or_directory);
+        if (r < 0)
+                return r;
+
+        if (!path_is_absolute(src) || !path_is_normalized(src))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and normalized.");
+
+        if (isempty(dest))
+                dest = src;
+        else if (!path_is_absolute(dest) || !path_is_normalized(dest))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and normalized.");
+
+        r = bus_verify_manage_units_async_full(
+                        u,
+                        "bind-mount",
+                        CAP_SYS_ADMIN,
+                        N_("Authentication is required to bind mount on '$(unit)'."),
+                        true,
+                        message,
+                        error);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+        if (u->type != UNIT_SERVICE)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not of type .service");
+
+        /* If it would be dropped at startup time, return an error. The context should always be available, but
+         * there's an assert in exec_needs_mount_namespace, so double-check just in case. */
+        c = unit_get_exec_context(u);
+        if (!c)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot access unit execution context");
+        if (path_startswith_strv(dest, c->inaccessible_paths))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s is not accessible to this unit", dest);
+
+        /* Ensure that the unit was started in a private mount namespace */
+        if (!exec_needs_mount_namespace(c, NULL, unit_get_exec_runtime(u)))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit not running in private mount namespace, cannot activate bind mount");
+
+        unit_pid = unit_main_pid(u);
+        if (unit_pid == 0 || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not running");
+
+        propagate_directory = strjoina("/run/systemd/propagate/", u->id);
+        r = bind_mount_in_namespace(unit_pid,
+                                    propagate_directory,
+                                    "/run/systemd/incoming/",
+                                    src, dest, read_only, make_file_or_directory);
+        if (r < 0)
+                return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in unit's namespace: %m", src, dest);
+
+        return sd_bus_reply_method_return(message, NULL);
+}
+
 const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Service, type), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -146,6 +223,16 @@ const sd_bus_vtable bus_service_vtable[] = {
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStopPostEx", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
 
+        SD_BUS_METHOD_WITH_NAMES("BindMount",
+                                 "ssbb",
+                                 SD_BUS_PARAM(source)
+                                 SD_BUS_PARAM(destination)
+                                 SD_BUS_PARAM(read_only)
+                                 SD_BUS_PARAM(mkdir),
+                                 NULL,,
+                                 bus_service_method_bind_mount,
+                                 SD_BUS_VTABLE_UNPRIVILEGED),
+
         /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
         SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_ratelimit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
index 69311675c9bcb34e9ebc96a08a67aef87d6ba770..5b7b7b757be6b3ed4c19dd8ba5641b59282c1a17 100644 (file)
@@ -9,4 +9,5 @@
 extern const sd_bus_vtable bus_service_vtable[];
 
 int bus_service_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
+int bus_service_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error);
 int bus_service_commit_properties(Unit *u);
index 427152a757b77fe35a8dea41c031324493478687..67cc58ee9e0666af8dbcb593b59a63a23758cace 100644 (file)
@@ -323,38 +323,6 @@ static int property_get_load_error(
         return sd_bus_message_append(reply, "(ss)", NULL, NULL);
 }
 
-static int bus_verify_manage_units_async_full(
-                Unit *u,
-                const char *verb,
-                int capability,
-                const char *polkit_message,
-                bool interactive,
-                sd_bus_message *call,
-                sd_bus_error *error) {
-
-        const char *details[9] = {
-                "unit", u->id,
-                "verb", verb,
-        };
-
-        if (polkit_message) {
-                details[4] = "polkit.message";
-                details[5] = polkit_message;
-                details[6] = "polkit.gettext_domain";
-                details[7] = GETTEXT_PACKAGE;
-        }
-
-        return bus_verify_polkit_async(
-                        call,
-                        capability,
-                        "org.freedesktop.systemd1.manage-units",
-                        details,
-                        interactive,
-                        UID_INVALID,
-                        &u->manager->polkit_registry,
-                        error);
-}
-
 static const char *const polkit_message_for_job[_JOB_TYPE_MAX] = {
         [JOB_START]       = N_("Authentication is required to start '$(unit)'."),
         [JOB_STOP]        = N_("Authentication is required to stop '$(unit)'."),
index d6223db305c15537dc42a99d6393f306d6f6d4df..2d22bc699a34893e2f5e5262fbf0962197fe32e2 100644 (file)
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include "bus-polkit.h"
 #include "bus-util.h"
 #include "dbus-util.h"
 #include "parse-util.h"
@@ -153,3 +154,35 @@ int bus_set_transient_usec_internal(
 
         return 1;
 }
+
+int bus_verify_manage_units_async_full(
+                Unit *u,
+                const char *verb,
+                int capability,
+                const char *polkit_message,
+                bool interactive,
+                sd_bus_message *call,
+                sd_bus_error *error) {
+
+        const char *details[9] = {
+                "unit", u->id,
+                "verb", verb,
+        };
+
+        if (polkit_message) {
+                details[4] = "polkit.message";
+                details[5] = polkit_message;
+                details[6] = "polkit.gettext_domain";
+                details[7] = GETTEXT_PACKAGE;
+        }
+
+        return bus_verify_polkit_async(
+                        call,
+                        capability,
+                        "org.freedesktop.systemd1.manage-units",
+                        details,
+                        interactive,
+                        UID_INVALID,
+                        &u->manager->polkit_registry,
+                        error);
+}
index 4e7c68e8439062a836b2c0a7513ab0fee4a37577..e35c632d378434fa5e7ebc409dc1bc7967f2eea2 100644 (file)
@@ -248,3 +248,4 @@ static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, s
 static inline int bus_set_transient_usec_fix_0(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
         return bus_set_transient_usec_internal(u, name, p, true, message, flags, error);
 }
+int bus_verify_manage_units_async_full(Unit *u, const char *verb, int capability, const char *polkit_message, bool interactive, sd_bus_message *call, sd_bus_error *error);
index ee5f082783b484b826f17fe0dd77299dde4b7394..5f170db8d179924808a595f2fdcccdac89597bd2 100644 (file)
@@ -1987,13 +1987,12 @@ static int build_pass_environment(const ExecContext *c, char ***ret) {
         return 0;
 }
 
-static bool exec_needs_mount_namespace(
+bool exec_needs_mount_namespace(
                 const ExecContext *context,
                 const ExecParameters *params,
                 const ExecRuntime *runtime) {
 
         assert(context);
-        assert(params);
 
         if (context->root_image)
                 return true;
@@ -2035,7 +2034,7 @@ static bool exec_needs_mount_namespace(
                         return true;
 
                 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
-                        if (!params->prefix[t])
+                        if (params && !params->prefix[t])
                                 continue;
 
                         if (!strv_isempty(context->directories[t].paths))
@@ -3115,7 +3114,7 @@ static int apply_mount_namespace(
         _cleanup_strv_free_ char **empty_directories = NULL;
         const char *tmp_dir = NULL, *var_tmp_dir = NULL;
         const char *root_dir = NULL, *root_image = NULL;
-        _cleanup_free_ char *creds_path = NULL;
+        _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL;
         NamespaceInfo ns_info;
         bool needs_sandboxing;
         BindMount *bind_mounts = NULL;
@@ -3192,6 +3191,15 @@ static int apply_mount_namespace(
                 }
         }
 
+        if (MANAGER_IS_SYSTEM(u->manager)) {
+                propagate_dir = path_join("/run/systemd/propagate/", u->id);
+                if (!propagate_dir)
+                        return -ENOMEM;
+                incoming_dir = strdup("/run/systemd/incoming");
+                if (!incoming_dir)
+                        return -ENOMEM;
+        }
+
         r = setup_namespace(root_dir, root_image, context->root_image_options,
                             &ns_info, context->read_write_paths,
                             needs_sandboxing ? context->read_only_paths : NULL,
@@ -3211,6 +3219,8 @@ static int apply_mount_namespace(
                             context->root_hash, context->root_hash_size, context->root_hash_path,
                             context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
                             context->root_verity,
+                            propagate_dir,
+                            incoming_dir,
                             DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
                             error_path);
 
index da8d6ae27290f8b994e0c9b4fa8b2a57de57b345..2da4699df1869616b0d9054c576de0aab21f3363 100644 (file)
@@ -471,3 +471,5 @@ ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_;
 
 const char* exec_resource_type_to_string(ExecDirectoryType i) _const_;
 ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
+
+bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);
index 73a8fa73a4dcea19e4c97f2c4eff5f30c22af2ec..4b5519e11b295de5b23a0d36def764e7d7ea1d29 100644 (file)
@@ -1301,7 +1301,8 @@ static size_t namespace_calculate_mounts(
                 const char* tmp_dir,
                 const char* var_tmp_dir,
                 const char *creds_path,
-                const char* log_namespace) {
+                const char* log_namespace,
+                bool setup_propagate) {
 
         size_t protect_home_cnt;
         size_t protect_system_cnt =
@@ -1328,6 +1329,7 @@ static size_t namespace_calculate_mounts(
                 n_bind_mounts +
                 n_mount_images +
                 n_temporary_filesystems +
+                (setup_propagate ? 1 : 0) + /* /run/systemd/incoming */
                 ns_info->private_dev +
                 (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
                 (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
@@ -1487,6 +1489,8 @@ int setup_namespace(
                 size_t root_hash_sig_size,
                 const char *root_hash_sig_path,
                 const char *verity_data_path,
+                const char *propagate_dir,
+                const char *incoming_dir,
                 DissectImageFlags dissect_image_flags,
                 char **error_path) {
 
@@ -1495,13 +1499,16 @@ int setup_namespace(
         _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
         MountEntry *m = NULL, *mounts = NULL;
-        bool require_prefix = false;
+        bool require_prefix = false, setup_propagate = false;
         const char *root;
         size_t n_mounts;
         int r;
 
         assert(ns_info);
 
+        if (!isempty(propagate_dir) && !isempty(incoming_dir))
+                setup_propagate = true;
+
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
@@ -1585,7 +1592,8 @@ int setup_namespace(
                         n_mount_images,
                         tmp_dir, var_tmp_dir,
                         creds_path,
-                        log_namespace);
+                        log_namespace,
+                        setup_propagate);
 
         if (n_mounts > 0) {
                 m = mounts = new0(MountEntry, n_mounts);
@@ -1754,6 +1762,15 @@ int setup_namespace(
                         };
                 }
 
+                /* Will be used to add bind mounts at runtime */
+                if (setup_propagate)
+                        *(m++) = (MountEntry) {
+                                .source_const = propagate_dir,
+                                .path_const = incoming_dir,
+                                .mode = BIND_MOUNT,
+                                .read_only = true,
+                        };
+
                 assert(mounts + n_mounts == m);
 
                 /* Prepend the root directory where that's necessary */
@@ -1778,6 +1795,10 @@ int setup_namespace(
                 goto finish;
         }
 
+        /* Create the source directory to allow runtime propagation of mounts */
+        if (setup_propagate)
+                (void) mkdir_p(propagate_dir, 0600);
+
         /* Remount / as SLAVE so that nothing now mounted in the namespace
          * shows up in the parent */
         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
@@ -1919,6 +1940,16 @@ int setup_namespace(
                 goto finish;
         }
 
+        /* bind_mount_in_namespace() will MS_MOVE into that directory, and that's only
+         * supported for non-shared mounts. This needs to happen after remounting / or it will fail. */
+        if (setup_propagate) {
+                r = mount(NULL, incoming_dir, NULL, MS_SLAVE, NULL);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to remount %s with MS_SLAVE: %m", incoming_dir);
+                        goto finish;
+                }
+        }
+
         r = 0;
 
 finish:
index da0861c4061678652e3698a25437acb06a71be3a..91ee44cd517d6e36034a06b4b310c02a85cacdbd 100644 (file)
@@ -127,6 +127,8 @@ int setup_namespace(
                 size_t root_hash_sig_size,
                 const char *root_hash_sig_path,
                 const char *root_verity,
+                const char *propagate_dir,
+                const char *incoming_dir,
                 DissectImageFlags dissected_image_flags,
                 char **error_path);
 
index 8b32379835d604f1e8b1c5ff4201f19231e05c77..0cea4d2b02419b6378115d9819804c2d5643fb4f 100644 (file)
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="ReloadOrTryRestartUnit"/>
 
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="BindMountUnit"/>
+
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="KillUnit"/>
                        send_interface="org.freedesktop.systemd1.Service"
                        send_member="AttachProcesses"/>
 
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Service"
+                       send_member="BindMount"/>
+
                 <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Scope interface -->
 
                 <allow send_destination="org.freedesktop.systemd1"
diff --git a/src/systemctl/systemctl-mount.c b/src/systemctl/systemctl-mount.c
new file mode 100644 (file)
index 0000000..513a876
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-mount.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int mount_bind(int argc, char *argv[], void *userdata) {
+        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_free_ char *n = NULL;
+        sd_bus *bus;
+        int r;
+
+        r = acquire_bus(BUS_MANAGER, &bus);
+        if (r < 0)
+                return r;
+
+        polkit_agent_open_maybe();
+
+        r = unit_name_mangle(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, &n);
+        if (r < 0)
+                return log_error_errno(r, "Failed to mangle unit name: %m");
+
+        r = bus_call_method(
+                        bus,
+                        bus_systemd_mgr,
+                        "BindMountUnit",
+                        &error,
+                        NULL,
+                        "sssbb",
+                        n,
+                        argv[2],
+                        argv[3],
+                        arg_read_only,
+                        arg_mkdir);
+        if (r < 0)
+                return log_error_errno(r, "Failed to bind mount: %s", bus_error_message(&error, r));
+
+        return 0;
+}
diff --git a/src/systemctl/systemctl-mount.h b/src/systemctl/systemctl-mount.h
new file mode 100644 (file)
index 0000000..1f9b387
--- /dev/null
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int mount_bind(int argc, char *argv[], void *userdata);
index 7471fadd91308f190ced597d7c4fa786cabd7e18..4726f65f970b372d8491fd9b851274708c1ca9ec 100644 (file)
@@ -38,6 +38,7 @@
 #include "systemctl-list-units.h"
 #include "systemctl-log-setting.h"
 #include "systemctl-logind.h"
+#include "systemctl-mount.h"
 #include "systemctl-preset-all.h"
 #include "systemctl-reset-failed.h"
 #include "systemctl-service-watchdogs.h"
@@ -105,6 +106,8 @@ bool arg_jobs_before = false;
 bool arg_jobs_after = false;
 char **arg_clean_what = NULL;
 TimestampStyle arg_timestamp_style = TIMESTAMP_PRETTY;
+bool arg_read_only = false;
+bool arg_mkdir = false;
 
 STATIC_DESTRUCTOR_REGISTER(arg_wall, strv_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
@@ -157,6 +160,8 @@ static int systemctl_help(void) {
                "  freeze PATTERN...                   Freeze execution of unit processes\n"
                "  thaw PATTERN...                     Resume execution of a frozen unit\n"
                "  set-property UNIT PROPERTY=VALUE... Sets one or more properties of a unit\n"
+               "  bind UNIT PATH [PATH]               Bind-mount a path from the host into a\n"
+               "                                      unit's namespace\n"
                "  service-log-level SERVICE [LEVEL]   Get/set logging threshold for service\n"
                "  service-log-target SERVICE [TARGET] Get/set logging target for service\n"
                "  reset-failed [PATTERN...]           Reset failed state for all, one, or more\n"
@@ -286,6 +291,8 @@ static int systemctl_help(void) {
                "                         'us': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU TZ\n"
                "                         'utc': 'Day YYYY-MM-DD HH:MM:SS UTC\n"
                "                         'us+utc': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU UTC\n"
+               "     --read-only         Create read-only bind mount\n"
+               "     --mkdir             Create directory before bind-mounting, if missing\n"
                "\nSee the %2$s for details.\n"
                , program_invocation_short_name
                , link
@@ -401,6 +408,8 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
                 ARG_WHAT,
                 ARG_REBOOT_ARG,
                 ARG_TIMESTAMP_STYLE,
+                ARG_READ_ONLY,
+                ARG_MKDIR,
         };
 
         static const struct option options[] = {
@@ -457,6 +466,8 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
                 { "what",                required_argument, NULL, ARG_WHAT                },
                 { "reboot-argument",     required_argument, NULL, ARG_REBOOT_ARG          },
                 { "timestamp",           required_argument, NULL, ARG_TIMESTAMP_STYLE     },
+                { "read-only",           no_argument,       NULL, ARG_READ_ONLY           },
+                { "mkdir",               no_argument,       NULL, ARG_MKDIR               },
                 {}
         };
 
@@ -878,6 +889,14 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
 
                         break;
 
+                case ARG_READ_ONLY:
+                        arg_read_only = true;
+                        break;
+
+                case ARG_MKDIR:
+                        arg_mkdir = true;
+                        break;
+
                 case '.':
                         /* Output an error mimicking getopt, and print a hint afterwards */
                         log_error("%s: invalid option -- '.'", program_invocation_name);
@@ -1045,6 +1064,7 @@ static int systemctl_main(int argc, char *argv[]) {
                 { "add-wants",             3,        VERB_ANY, 0,                add_dependency          },
                 { "add-requires",          3,        VERB_ANY, 0,                add_dependency          },
                 { "edit",                  2,        VERB_ANY, VERB_ONLINE_ONLY, edit                    },
+                { "bind",                  3,        4,        VERB_ONLINE_ONLY, mount_bind              },
                 {}
         };
 
index cdf56c72208197c180d239121d2fea433792fd71..34650ebb4435d1bd4d58a6fc182d927f35aba334 100644 (file)
@@ -90,3 +90,5 @@ extern bool arg_jobs_before;
 extern bool arg_jobs_after;
 extern char **arg_clean_what;
 extern TimestampStyle arg_timestamp_style;
+extern bool arg_read_only;
+extern bool arg_mkdir;
index e234f54deee8ed03d89c705e22a936e1507cc2ea..d92bcacfad304a9c604a87662bc2b40e113a58a7 100644 (file)
@@ -172,6 +172,8 @@ static void test_protect_kernel_logs(void) {
                                     0,
                                     NULL,
                                     NULL,
+                                    NULL,
+                                    NULL,
                                     0,
                                     NULL);
                 assert_se(r == 0);
index 6ec1cff28ad2ee82393415718353d573d53d24fb..88bdb437debc1dfbe5be2202949d4a4772b6a999 100644 (file)
@@ -87,6 +87,8 @@ int main(int argc, char *argv[]) {
                             0,
                             NULL,
                             NULL,
+                            NULL,
+                            NULL,
                             0,
                             NULL);
         if (r < 0) {
diff --git a/test/TEST-57-RUNTIME-BIND-PATHS/Makefile b/test/TEST-57-RUNTIME-BIND-PATHS/Makefile
new file mode 120000 (symlink)
index 0000000..e9f93b1
--- /dev/null
@@ -0,0 +1 @@
+../TEST-01-BASIC/Makefile
\ No newline at end of file
diff --git a/test/TEST-57-RUNTIME-BIND-PATHS/test.sh b/test/TEST-57-RUNTIME-BIND-PATHS/test.sh
new file mode 100755 (executable)
index 0000000..76d1b49
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -e
+
+TEST_DESCRIPTION="test adding new BindPaths while unit is already running"
+. $TEST_BASE_DIR/test-functions
+
+do_test "$@" 57
diff --git a/test/units/testsuite-57-namespaced.service b/test/units/testsuite-57-namespaced.service
new file mode 100644 (file)
index 0000000..0abcc4f
--- /dev/null
@@ -0,0 +1,12 @@
+[Service]
+RuntimeMaxSec=300
+# Adding a new mounts at runtime works if the unit is in the active state,
+# so use Type=notify to make sure there's no race condition in the test
+Type=notify
+RemainAfterExit=yes
+MountAPIVFS=yes
+PrivateTmp=yes
+BindPaths=/run/testservice-57-fixed:/tmp/testfile_fixed
+InaccessiblePaths=/run/inaccessible
+ExecStartPre=grep -q -F MARKER_FIXED /tmp/testfile_fixed
+ExecStart=/bin/sh -c 'systemd-notify --ready; while ! grep -q -F MARKER_RUNTIME /tmp/testfile_runtime; do sleep 0.1; done; test ! -f /run/inaccessible/testfile_fixed'
diff --git a/test/units/testsuite-57-non-namespaced.service b/test/units/testsuite-57-non-namespaced.service
new file mode 100644 (file)
index 0000000..e86c64d
--- /dev/null
@@ -0,0 +1,5 @@
+[Service]
+RuntimeMaxSec=10
+Type=notify
+RemainAfterExit=yes
+ExecStart=/bin/sh -c 'systemd-notify --ready; while ! grep -q -F MARKER_RUNTIME /tmp/testfile_runtime; do sleep 0.1; done; exit 0'
diff --git a/test/units/testsuite-57.service b/test/units/testsuite-57.service
new file mode 100644 (file)
index 0000000..b6edd86
--- /dev/null
@@ -0,0 +1,7 @@
+[Unit]
+Description=TEST-57-RUNTIME-BIND-PATHS
+
+[Service]
+ExecStartPre=rm -f /failed /testok
+ExecStart=/usr/lib/systemd/tests/testdata/units/%N.sh
+Type=oneshot
diff --git a/test/units/testsuite-57.sh b/test/units/testsuite-57.sh
new file mode 100755 (executable)
index 0000000..c24cf32
--- /dev/null
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -ex
+
+echo "MARKER_FIXED" > /run/testservice-57-fixed
+mkdir -p /run/inaccessible
+
+systemctl start testsuite-57-namespaced.service
+
+# Ensure that inaccessible paths aren't bypassed by the runtime setup
+set +e
+systemctl bind --mkdir testsuite-57-namespaced.service /run/testservice-57-fixed /run/inaccessible/testfile_fixed && exit 1
+set -e
+
+echo "MARKER_RUNTIME" > /run/testservice-57-runtime
+
+systemctl bind --mkdir testsuite-57-namespaced.service /run/testservice-57-runtime /tmp/testfile_runtime
+
+while systemctl show -P SubState testsuite-57-namespaced.service | grep -q running
+do
+    sleep 0.1
+done
+
+systemctl is-active testsuite-57-namespaced.service
+
+# Now test that systemctl bind fails when attempted on a non-namespaced unit
+systemctl start testsuite-57-non-namespaced.service
+
+set +e
+systemctl bind --mkdir testsuite-57-non-namespaced.service /run/testservice-57-runtime /tmp/testfile_runtime && exit 1
+set -e
+
+while systemctl show -P SubState testsuite-57-non-namespaced.service | grep -q running
+do
+    sleep 0.1
+done
+
+set +e
+systemctl is-active testsuite-57-non-namespaced.service && exit 1
+set -e
+
+echo OK > /testok
+
+exit 0