]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: Use a subdirectory of /run/ for PrivateDevices= 29343/head
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Wed, 27 Sep 2023 08:52:50 +0000 (10:52 +0200)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Mon, 2 Oct 2023 10:23:13 +0000 (12:23 +0200)
When we're starting early boot services such as systemd-userdbd.service,
/tmp might not yet be mounted, so let's use a directory in /run instead
which is guaranteed to be available.

src/core/execute.c
src/core/namespace.c
src/core/namespace.h
src/test/test-namespace.c
src/test/test-ns.c

index a52df64d010812c138ce344162eeb980fa306f4a..89c3868d55f6c8eefb21f52117e65dfa9172ebdc 100644 (file)
@@ -3307,6 +3307,7 @@ static int apply_mount_namespace(
                         extension_dir,
                         root_dir || root_image ? params->notify_socket : NULL,
                         host_os_release_stage,
+                        params->runtime_scope,
                         error_path);
 
         /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
index d1153f76901400d15c61153694aed5aac0d77046..a0471ac8884bf85b9ca7fe06092a124d2c18b8d7 100644 (file)
@@ -909,7 +909,19 @@ add_symlink:
         return 0;
 }
 
-static int mount_private_dev(MountEntry *m) {
+static char *settle_runtime_dir(RuntimeScope scope) {
+        char *runtime_dir;
+
+        if (scope != RUNTIME_SCOPE_USER)
+                return strdup("/run/");
+
+        if (asprintf(&runtime_dir, "/run/user/" UID_FMT, geteuid()) < 0)
+                return NULL;
+
+        return runtime_dir;
+}
+
+static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
         static const char devnodes[] =
                 "/dev/null\0"
                 "/dev/zero\0"
@@ -918,13 +930,21 @@ static int mount_private_dev(MountEntry *m) {
                 "/dev/urandom\0"
                 "/dev/tty\0";
 
-        char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+        _cleanup_free_ char *runtime_dir = NULL, *temporary_mount = NULL;
         const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
         bool can_mknod = true;
         int r;
 
         assert(m);
 
+        runtime_dir = settle_runtime_dir(scope);
+        if (!runtime_dir)
+                return log_oom_debug();
+
+        temporary_mount = path_join(runtime_dir, "systemd/namespace-dev-XXXXXX");
+        if (!temporary_mount)
+                return log_oom_debug();
+
         if (!mkdtemp(temporary_mount))
                 return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
 
@@ -1364,7 +1384,8 @@ static int apply_one_mount(
                 MountEntry *m,
                 const ImagePolicy *mount_image_policy,
                 const ImagePolicy *extension_image_policy,
-                const NamespaceInfo *ns_info) {
+                const NamespaceInfo *ns_info,
+                RuntimeScope scope) {
 
         _cleanup_free_ char *inaccessible = NULL;
         bool rbind = true, make = false;
@@ -1379,8 +1400,7 @@ static int apply_one_mount(
         switch (m->mode) {
 
         case INACCESSIBLE: {
-                _cleanup_free_ char *tmp = NULL;
-                const char *runtime_dir;
+                _cleanup_free_ char *runtime_dir = NULL;
                 struct stat target;
 
                 /* First, get rid of everything that is below if there
@@ -1396,14 +1416,14 @@ static int apply_one_mount(
                                                mount_entry_path(m));
                 }
 
-                if (geteuid() == 0)
-                        runtime_dir = "/run";
-                else {
-                        if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0)
-                                return -ENOMEM;
-
-                        runtime_dir = tmp;
-                }
+                /* We don't pass the literal runtime scope through here but one based purely on our UID. This
+                 * means that the root user's --user services will use the host's inaccessible inodes rather
+                 * then root's private ones. This is preferable since it means device nodes that are
+                 * overmounted to make them inaccessible will be overmounted with a device node, rather than
+                 * an AF_UNIX socket inode. */
+                runtime_dir = settle_runtime_dir(geteuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER);
+                if (!runtime_dir)
+                        return log_oom_debug();
 
                 r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
                 if (r < 0)
@@ -1523,7 +1543,7 @@ static int apply_one_mount(
                 break;
 
         case PRIVATE_DEV:
-                return mount_private_dev(m);
+                return mount_private_dev(m, scope);
 
         case BIND_DEV:
                 return mount_bind_dev(m);
@@ -1824,6 +1844,7 @@ static int apply_mounts(
                 const NamespaceInfo *ns_info,
                 MountEntry *mounts,
                 size_t *n_mounts,
+                RuntimeScope scope,
                 char **symlinks,
                 char **error_path) {
 
@@ -1875,7 +1896,7 @@ static int apply_mounts(
                                 break;
                         }
 
-                        r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info);
+                        r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info, scope);
                         if (r < 0) {
                                 if (error_path && mount_entry_path(m))
                                         *error_path = strdup(mount_entry_path(m));
@@ -2030,6 +2051,7 @@ int setup_namespace(
                 const char *extension_dir,
                 const char *notify_socket,
                 const char *host_os_release_stage,
+                RuntimeScope scope,
                 char **error_path) {
 
         _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
@@ -2490,7 +2512,14 @@ int setup_namespace(
                 (void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
 
         /* Now make the magic happen */
-        r = apply_mounts(root, mount_image_policy, extension_image_policy, ns_info, mounts, &n_mounts, symlinks, error_path);
+        r = apply_mounts(root,
+                         mount_image_policy,
+                         extension_image_policy,
+                         ns_info,
+                         mounts, &n_mounts,
+                         scope,
+                         symlinks,
+                         error_path);
         if (r < 0)
                 goto finish;
 
index b6132154c5132ec0c98d83b7b3c3e82e4b1f1ba8..581403d89826d577ea0d9335d42714e3d7380277 100644 (file)
@@ -16,6 +16,7 @@ typedef struct MountImage MountImage;
 #include "fs-util.h"
 #include "macro.h"
 #include "namespace-util.h"
+#include "runtime-scope.h"
 #include "string-util.h"
 
 typedef enum ProtectHome {
@@ -134,6 +135,7 @@ int setup_namespace(
                 const char *extension_dir,
                 const char *notify_socket,
                 const char *host_os_release_stage,
+                RuntimeScope scope,
                 char **error_path);
 
 #define RUN_SYSTEMD_EMPTY "/run/systemd/empty"
index 25aafc35ca837dda241061be465256c6d6325439..42ac65d08c87abaf0ea5458c6549c04a34153603 100644 (file)
@@ -206,6 +206,7 @@ TEST(protect_kernel_logs) {
                                     NULL,
                                     NULL,
                                     NULL,
+                                    RUNTIME_SCOPE_SYSTEM,
                                     NULL);
                 assert_se(r == 0);
 
index 77afd2f6b9eb81f1c23701fc153674c04a26647b..eb3afed9e1c660813b67949ae50a62cf08408b2a 100644 (file)
@@ -108,6 +108,7 @@ int main(int argc, char *argv[]) {
                             NULL,
                             NULL,
                             NULL,
+                            RUNTIME_SCOPE_SYSTEM,
                             NULL);
         if (r < 0) {
                 log_error_errno(r, "Failed to set up namespace: %m");