From 6720e356c137d5e1b744e498858ed6f667bf5af2 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 25 Jun 2021 15:16:34 +0900 Subject: [PATCH] Revert "Revert "Mount all fs nosuid when NoNewPrivileges=yes"" This reverts commit 1753d3021564671fba3d3196a84da657d15fb632. Let's re-enable that feature now. As reported when the original commit was merged, this causes some trouble on SELinux enabled systems. So, in the subsequent commit, the feature will be disabled when SELinux is enabled. But, anyway, this commit just re-enable that feature unconditionally. --- man/systemd.exec.xml | 7 ++++--- src/core/execute.c | 2 ++ src/core/namespace.c | 32 ++++++++++++++++++++++++++++++++ src/core/namespace.h | 1 + 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 893b56d93ad..96d18dd93bd 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -675,9 +675,10 @@ CapabilityBoundingSet=~CAP_B CAP_C SystemCallArchitectures=, SystemCallFilter=, or SystemCallLog= are specified. Note that even if this setting is overridden - by them, systemctl show shows the original value of this setting. Also see - No New - Privileges Flag. + by them, systemctl show shows the original value of this setting. In case the + service will be run in a new mount namespace anyway, all file systems are mounted with MS_NOSUID + flag. Also see + No New Privileges Flag. diff --git a/src/core/execute.c b/src/core/execute.c index 42d76a346db..dcf683f68b9 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -3190,6 +3190,8 @@ static int apply_mount_namespace( .protect_proc = context->protect_proc, .proc_subset = context->proc_subset, .private_ipc = context->private_ipc || context->ipc_namespace_path, + /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ + .mount_nosuid = context->no_new_privileges, }; } else if (!context->dynamic_user && root_dir) /* diff --git a/src/core/namespace.c b/src/core/namespace.c index 6d77ce99674..71fc73b9d30 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1464,6 +1464,27 @@ static int make_noexec(const MountEntry *m, char **deny_list, FILE *proc_self_mo return 0; } +static int make_nosuid(const MountEntry *m, FILE *proc_self_mountinfo) { + bool submounts = false; + int r = 0; + + assert(m); + assert(proc_self_mountinfo); + + submounts = !IN_SET(m->mode, EMPTY_DIR, TMPFS); + + if (submounts) + r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), MS_NOSUID, MS_NOSUID, NULL, proc_self_mountinfo); + else + r = bind_remount_one_with_mountinfo(mount_entry_path(m), MS_NOSUID, MS_NOSUID, proc_self_mountinfo); + if (r == -ENOENT && m->ignore) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to re-mount '%s'%s: %m", mount_entry_path(m), + submounts ? " and its submounts" : ""); + return 0; +} + static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) { assert(ns_info); @@ -1660,6 +1681,17 @@ static int apply_mounts( } } + /* Fourth round, flip the nosuid bits without a deny list. */ + if (ns_info->mount_nosuid) + for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { + r = make_nosuid(m, proc_self_mountinfo); + if (r < 0) { + if (error_path && mount_entry_path(m)) + *error_path = strdup(mount_entry_path(m)); + return r; + } + } + return 1; } diff --git a/src/core/namespace.h b/src/core/namespace.h index 737d6eae8b1..c9373a4adb1 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -74,6 +74,7 @@ struct NamespaceInfo { bool mount_apivfs; bool protect_hostname; bool private_ipc; + bool mount_nosuid; ProtectHome protect_home; ProtectSystem protect_system; ProtectProc protect_proc; -- 2.47.3