From: Lennart Poettering Date: Fri, 14 Aug 2020 13:54:48 +0000 (+0200) Subject: core: hide /run/credentials whenever namespacing is requested X-Git-Tag: v247-rc1~362^2~6 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=bbb4e7f39f2c68c719c26c2c65f8b7b91b009e92;p=thirdparty%2Fsystemd.git core: hide /run/credentials whenever namespacing is requested Ideally we would like to hide all other service's credentials for all services. That would imply for us to enable mount namespacing for all services, which is something we cannot do, both due to compatibility with the status quo ante, and because a number of services legitimately should be able to install mounts in the host hierarchy. Hence we do the second best thing, we hide the credentials automatically for all services that opt into mount namespacing otherwise. This is quite different from other mount sandboxing options: usually you have to explicitly opt into each. However, given that the credentials logic is a brand new concept we invented right here and now, and particularly security sensitive it's OK to reverse this, and by default hide credentials whenever we can (i.e. whenever mount namespacing is otherwise opt-ed in to). Long story short: if you want to hide other service's credentials, the most basic options is to just turn on PrivateMounts= and there you go, they should all be gone. --- diff --git a/src/core/execute.c b/src/core/execute.c index 81829007b4a..aede50c5fe1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -3056,6 +3056,7 @@ static int apply_mount_namespace( _cleanup_strv_free_ char **empty_directories = NULL; const char *tmp_dir = NULL, *var_tmp_dir = NULL; const char *root_dir = NULL, *root_image = NULL; + _cleanup_free_ char *creds_path = NULL; NamespaceInfo ns_info; bool needs_sandboxing; BindMount *bind_mounts = NULL; @@ -3124,6 +3125,12 @@ static int apply_mount_namespace( if (context->mount_flags == MS_SHARED) log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring"); + if (exec_context_has_credentials(context) && params->prefix[EXEC_DIRECTORY_RUNTIME]) { + creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id); + if (!creds_path) + return -ENOMEM; + } + r = setup_namespace(root_dir, root_image, context->root_image_options, &ns_info, context->read_write_paths, needs_sandboxing ? context->read_only_paths : NULL, @@ -3137,6 +3144,7 @@ static int apply_mount_namespace( context->n_mount_images, tmp_dir, var_tmp_dir, + creds_path, context->log_namespace, context->mount_flags, context->root_hash, context->root_hash_size, context->root_hash_path, diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index d0b707f3c5a..cc43bcdc7b6 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -537,6 +537,9 @@ int mount_setup(bool loaded_policy, bool leave_propagation) { (void) mkdir_label("/run/systemd", 0755); (void) mkdir_label("/run/systemd/system", 0755); + /* Make sure we have a mount point to hide in sandboxes */ + (void) mkdir_label("/run/credentials", 0755); + /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount * inaccessible nodes from. If we run in a container the host might have created these for us already * in /run/host/inaccessible/. Use those if we can, since tht way we likely get access to block/char diff --git a/src/core/namespace.c b/src/core/namespace.c index 1f78d66a347..5a5095ee0a5 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1270,6 +1270,7 @@ static size_t namespace_calculate_mounts( size_t n_mount_images, const char* tmp_dir, const char* var_tmp_dir, + const char *creds_path, const char* log_namespace) { size_t protect_home_cnt; @@ -1305,6 +1306,7 @@ static size_t namespace_calculate_mounts( protect_home_cnt + protect_system_cnt + (ns_info->protect_hostname ? 2 : 0) + (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0) + + (creds_path ? 2 : 1) + !!log_namespace; } @@ -1389,6 +1391,7 @@ int setup_namespace( size_t n_mount_images, const char* tmp_dir, const char* var_tmp_dir, + const char *creds_path, const char *log_namespace, unsigned long mount_flags, const void *root_hash, @@ -1494,6 +1497,7 @@ int setup_namespace( n_temporary_filesystems, n_mount_images, tmp_dir, var_tmp_dir, + creds_path, log_namespace); if (n_mounts > 0) { @@ -1619,6 +1623,35 @@ int setup_namespace( }; } + if (creds_path) { + /* If our service has a credentials store configured, then bind that one in, but hide + * everything else. */ + + *(m++) = (MountEntry) { + .path_const = "/run/credentials", + .mode = TMPFS, + .read_only = true, + .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST, + .flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC, + }; + + *(m++) = (MountEntry) { + .path_const = creds_path, + .mode = BIND_MOUNT, + .read_only = true, + .source_const = creds_path, + }; + } else { + /* If our service has no credentials store configured, then make the whole + * credentials tree inaccessible wholesale. */ + + *(m++) = (MountEntry) { + .path_const = "/run/credentials", + .mode = INACCESSIBLE, + .ignore = true, + }; + } + if (log_namespace) { _cleanup_free_ char *q; diff --git a/src/core/namespace.h b/src/core/namespace.h index e682eae7942..13cc0e80cb0 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -117,6 +117,7 @@ int setup_namespace( size_t n_mount_images, const char *tmp_dir, const char *var_tmp_dir, + const char *creds_path, const char *log_namespace, unsigned long mount_flags, const void *root_hash, diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index af48e696684..39722073298 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -163,6 +163,7 @@ static void test_protect_kernel_logs(void) { NULL, NULL, NULL, + NULL, 0, NULL, 0, diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 29f6dc5e1f1..5d7931f619e 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) { tmp_dir, var_tmp_dir, NULL, + NULL, 0, NULL, 0,