]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: be more lenient when checking whether sandboxing is necessary
authorLennart Poettering <lennart@poettering.net>
Wed, 20 Nov 2019 11:23:17 +0000 (12:23 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 20 Nov 2019 11:30:04 +0000 (12:30 +0100)
In some containers unshare() is made unavailable entirely. Let's deal
with this that more gracefully and disable our sandboxing of services
then, so that we work in a container, under the assumption the container
manager is then responsible for sandboxing if we can't do it ourselves.

Previously, we'd insist on sandboxing as soon as any form of BindPath=
is used. With this change we only insist on it if we have a setting like
that where source and destination differ, i.e. there's a mapping
established that actually rearranges things, and thus would result in
systematically different behaviour if skipped (as opposed to mappings
that just make stuff read-only/writable that otherwise arent').

(Let's also update a test that intended to test for this behaviour with
a more specific configuration that still triggers the behaviour with
this change in place)

Fixes: #13955
(For testing purposes unshare() can easily be blocked with
systemd-nspawn --system-call-filter=~unshare.)

src/core/execute.c
test/test-execute/exec-readonlypaths-with-bindpaths.service

index 8ab4b18dc70e42abe60d2ac4bb8aeead37dff5f6..def73977fc1878f4cb25f4c8ae4dc8cc390a2927 100644 (file)
@@ -2459,6 +2459,40 @@ finish:
         return r;
 }
 
+static bool insist_on_sandboxing(
+                const ExecContext *context,
+                const char *root_dir,
+                const char *root_image,
+                const BindMount *bind_mounts,
+                size_t n_bind_mounts) {
+
+        size_t i;
+
+        assert(context);
+        assert(n_bind_mounts == 0 || bind_mounts);
+
+        /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
+         * would alter the view on the file system beyond making things read-only or invisble, i.e. would
+         * rearrange stuff in a way we cannot ignore gracefully. */
+
+        if (context->n_temporary_filesystems > 0)
+                return true;
+
+        if (root_dir || root_image)
+                return true;
+
+        if (context->dynamic_user)
+                return true;
+
+        /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
+         * essential. */
+        for (i = 0; i < n_bind_mounts; i++)
+                if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
+                        return true;
+
+        return false;
+}
+
 static int apply_mount_namespace(
                 const Unit *u,
                 const ExecCommand *command,
@@ -2545,28 +2579,28 @@ static int apply_mount_namespace(
                             DISSECT_IMAGE_DISCARD_ON_LOOP,
                             error_path);
 
-        bind_mount_free_many(bind_mounts, n_bind_mounts);
-
         /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
          * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
          * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
          * completely different execution environment. */
         if (r == -ENOANO) {
-                if (n_bind_mounts == 0 &&
-                    context->n_temporary_filesystems == 0 &&
-                    !root_dir && !root_image &&
-                    !context->dynamic_user) {
+                if (insist_on_sandboxing(
+                                    context,
+                                    root_dir, root_image,
+                                    bind_mounts,
+                                    n_bind_mounts)) {
+                        log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
+                                       "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
+                                       n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
+
+                        r = -EOPNOTSUPP;
+                } else {
                         log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
-                        return 0;
+                        r = 0;
                 }
-
-                log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
-                               "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
-                               n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
-
-                return -EOPNOTSUPP;
         }
 
+        bind_mount_free_many(bind_mounts, n_bind_mounts);
         return r;
 }
 
index ea9211395dbf9e2b88d40d2f822b0bc0ec02673a..438c7de70471fc8fef487176f694fe5fe8b62998 100644 (file)
@@ -3,7 +3,6 @@ Description=Test for ReadOnlyPaths=
 
 [Service]
 ReadOnlyPaths=/etc -/i-dont-exist /usr
-# From 6c47cd7d3bf35c8158a0737f34fe2c5dc95e72d6, RuntimeDirectory= implies BindPaths=.
-RuntimeDirectory=foo
+BindPaths=/etc:/tmp/etc2
 ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var'
 Type=oneshot